This notebook estimates the indicators based on the raw data and perfomrs the main analyses and figures used in the manuscript of the multicountry paper. The input is the “clean kobo output” that was first cleaned by 1.2_cleaning.

Packages and functions

Load required libraries:

library(tidyr)
library(dplyr)
library(readr)
library(utile.tools)
library(stringr)
library(ggplot2)
library(ggsankey)
library(ggnewscale)
library(alluvial)
library(viridis)
library(cowplot)
library(lme4)
library(knitr)
library(glmmTMB)

Load required functions. These custom fuctions are available at: https://github.com/AliciaMstt/GeneticIndicators

source("get_indicator1_data.R")
source("get_indicator2_data.R")
source("get_indicator3_data.R")
source("get_metadata.R")
source("transform_to_Ne.R")
source("estimate_indicator1.R")

Other custom functions:

### not in
'%!in%' <- function(x,y)!('%in%'(x,y))


#' Duplicates data to create additional facet. Thanks to https://stackoverflow.com/questions/18933575/easily-add-an-all-facet-to-facet-wrap-in-ggplot2
#' @param df a dataframe
#' @param col the name of facet column
#'  
CreateAllFacet <- function(df, col){
  df$facet <- df[[col]]
  temp <- df
  temp$facet <- "all"
  merged <-rbind(temp, df)

  # ensure the facet value is a factor
  merged[[col]] <- as.factor(merged[[col]])

  return(merged)
}

Custom colors:

## IUCN official colors
# Assuming order of levels is: "re", "cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown" (for regional, and w/o "re" for global). Make sure to change the levels to that order before plotting.
IUCNcolors<-c("brown2", "darkorange", "yellow", "green", "darkgreen", "darkgrey", "azure2", "bisque1")
IUCNcolors_regional<-c("darkorchid2", "brown2", "darkorange", "yellow", "green", "darkgreen", "darkgrey", "azure2", "bisque1")

## nice soft ramp for taxonomic groups
taxoncolors<-cividis(12) # same than using cividis(length(levels(as.factor(metadata$taxonomic_group))))

## Colors for simplified methods to define populations 
# assuming the levels (see how this was created in the section "Simplify combinations of methods to define populations"): of running levels(as.factor(ind2_data$defined_populations_simplified)) (after new order)

# get a set of colors to highlight genetic and geographic with similar colors

simplifiedmethods_colors<-c("#FFA07A", #"dispersal_buffer"
                            "#7f611b", # "eco_biogeo_proxies"
                            "#668cd1", # "genetic_clusters"     
                            "#668cd1", # "genetic_clusters eco_biogeo_proxies"     
                            "#45c097", # "genetic_clusters geographic_boundaries"  
                            "#d4b43e", # "geographic_boundaries"
                            "#d4b43e", # "geographic_boundaries eco_biogeo_proxies"
                            "#d4b43e", # "geographic_boundaries management_units" 
                            "#b34656", # "management_units" 
                            "#be72c9", # "other" 
                            "#be72c9")# "other_combinations" 

grouped_taxon_colors<-c("#9f43c8", "#91c637", "#e5463c")

Get data

Get indicators and metadata data from clean kobo output

# Get data:
kobo_clean<-read.csv(file="kobo_output_clean.csv", header=TRUE)

# Extract indicator 1 data from kobo output, show most relevant columns
ind1_data<-get_indicator1_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind1_data[,c(1:3, 12:14)])
# Extract Proportion of maintained populations (indicator) data from kobo output, show most relevant columns
ind2_data<-get_indicator2_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind2_data[,c(1:3, 9:10,13)])
# Extract indicator 3 data from kobo output, show most relevant columns
ind3_data<-get_indicator3_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind3_data[,c(1:3, 9:11)])
# extract metadata, show most relevant columns
metadata<-get_metadata(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(metadata[,c(1:3, 12, 25,26, 64)])

Get population data for those species assessed using the tabular text template instead of Kobo. This file was produced by the script 1.2_cleaning.Rmd

ind1_data_from_templates<-read.csv(file="ind1_data_from_templates.csv")

Add data recorded using the population template to the ind1_data already in the nice format.

ind1_data<-rbind(ind1_data, ind1_data_from_templates)

Estimate indicators

Indicator 1 (proportion of populations with Ne >500):

Show most relevant columns of indicator 1 data

head(ind1_data[,c(1:3, 12:14)])

Remember what the function to transform NcRange and NcPoint data into Ne does:

# check what the custom funciton does
transform_to_Ne
## function (ind1_data, ratio = 0.1) 
## {
##     ratio = ratio
##     if (!is.numeric(ratio) || ratio < 0 || ratio > 1) {
##         stop("Invalid argument. Please provide a number within the range 0 to 1, using `.` to delimit decimals.")
##     }
##     else {
##         ind1_data = ind1_data
##         ind1_data <- ind1_data %>% mutate(Nc_from_range = case_when(NcRange == 
##             "more_5000_bymuch" ~ 10000, NcRange == "more_5000" ~ 
##             5500, NcRange == "less_5000_bymuch" ~ 500, NcRange == 
##             "less_5000" ~ 4050, NcRange == "range_includes_5000" ~ 
##             5001)) %>% mutate(Ne_from_Nc = case_when(!is.na(NcPoint) ~ 
##             NcPoint * ratio, !is.na(Nc_from_range) ~ Nc_from_range * 
##             ratio)) %>% mutate(Ne_combined = if_else(is.na(Ne), 
##             Ne_from_Nc, Ne)) %>% mutate(Ne_calculated_from = if_else(is.na(Ne), 
##             if_else(!is.na(NcPoint), "NcPoint ratio", if_else(!is.na(Nc_from_range), 
##                 "NcRange ratio", NA_character_)), "genetic data"))
##         print(ind1_data)
##     }
## }

Use function to get Ne data from NcRange or NcPoint data, and their combination (Ne estimated from Ne if Ne is available, otherwise, from Nc)

ind1_data<-transform_to_Ne(ind1_data = ind1_data, ratio = 0.1)
## # A tibble: 5,652 × 40
##    country_assessme… taxonomic_group taxon scientific_auth… genus year_assesment
##    <chr>             <chr>           <chr> <chr>            <chr> <chr>         
##  1 sweden            mammal          Alce… (Linnaeus, 1758) Alces 2023          
##  2 sweden            mammal          Alce… (Linnaeus, 1758) Alces 2023          
##  3 sweden            mammal          Alce… (Linnaeus, 1758) Alces 2023          
##  4 sweden            fish            Silu… (Linnaeus, 1758) Silu… 2023          
##  5 sweden            fish            Silu… (Linnaeus, 1758) Silu… 2023          
##  6 sweden            fish            Silu… (Linnaeus, 1758) Silu… 2023          
##  7 sweden            fish            Silu… (Linnaeus, 1758) Silu… 2023          
##  8 sweden            fish            Silu… (Linnaeus, 1758) Silu… 2023          
##  9 sweden            fish            Silu… (Linnaeus, 1758) Silu… 2023          
## 10 sweden            bird            Dend… Bechstein 1803   Dend… 2022          
## # … with 5,642 more rows, and 34 more variables: name_assessor <chr>,
## #   email_assessor <chr>, kobo_tabular <chr>, defined_populations <chr>,
## #   time_populations <chr>, X_validation_status <chr>, X_uuid <chr>,
## #   multiassessment <chr>, population <chr>, Name <chr>, Origin <chr>,
## #   IntroductionYear <chr>, Ne <dbl>, NeLower <dbl>, NeUpper <dbl>,
## #   NeYear <chr>, GeneticMarkers <chr>, GeneticMarkersOther <chr>,
## #   MethodNe <chr>, SourceNe <chr>, NcType <chr>, NcYear <chr>, …

Remember what the function to estimate indicator 1 does:

# check what the custom function does
estimate_indicator1
## function (ind1_data) 
## {
##     indicator1 <- ind1_data %>% group_by(X_uuid, ) %>% summarise(n_pops = n(), 
##         n_pops_Ne_data = sum(!is.na(Ne_combined)), n_pops_more_500 = sum(Ne_combined > 
##             500, na.rm = TRUE), indicator1 = n_pops_more_500/n_pops_Ne_data) %>% 
##         left_join(metadata)
##     print(indicator1)
## }

Now estimate indicator 1 :)

indicator1<-estimate_indicator1(ind1_data = ind1_data)
## Joining, by = "X_uuid"
## # A tibble: 609 × 69
##    X_uuid      n_pops n_pops_Ne_data n_pops_more_500 indicator1 country_assessm…
##    <chr>        <int>          <int>           <int>      <dbl> <chr>           
##  1 010d85cd-5…      2              1               1          1 united_states   
##  2 018d6a54-b…     47             46               0          0 united_states   
##  3 019bd95f-b…      1              1               0          0 sweden          
##  4 01b10b29-9…      1              1               1          1 south_africa    
##  5 0301e6b3-b…      3              3               3          1 france          
##  6 037d6c8f-7…      4              2               2          1 united_states   
##  7 03f03179-1…      1              1               1          1 south_africa    
##  8 0586b61e-7…     12             12               0          0 belgium         
##  9 065a53ba-0…      1              1               0          0 south_africa    
## 10 06e6bb50-3…      1              1               0          0 belgium         
## # … with 599 more rows, and 63 more variables: taxonomic_group <chr>,
## #   taxon <chr>, scientific_authority <chr>, genus <chr>, year_assesment <chr>,
## #   name_assessor <chr>, email_assessor <chr>, common_name <chr>,
## #   kobo_tabular <chr>, X_validation_status <chr>, GBIF_taxonID <int>,
## #   NCBI_taxonID <chr>, national_taxonID <chr>, source_national_taxonID <chr>,
## #   other_populations <chr>, time_populations <chr>, defined_populations <chr>,
## #   source_definition_populations <chr>, map_populations <chr>, …

Proportion of maintained populations (indicator 2) = proportion of populations within species which are maintained.

Proportion of maintained populations (indicator) is the he proportion of populations within species which are maintained. This can be estimated based on the n_extant_populations and n_extint_populations, as follows:

ind2_data$indicator2<- ind2_data$n_extant_populations / (ind2_data$n_extant_populations + ind2_data$n_extint_populations)
head(ind2_data$indicator2)
## [1] 1.0000000 0.5000000 0.2941176 1.0000000 0.3333333 1.0000000

Number of taxa with genetic monitoring squemes (indicator3)

Indicator 3 refers to the number (count) of taxa by country in which genetic monitoring is occurring. This is stored in the variable temp_gen_monitoring as a “yes/no” answer for each taxon, so to estimate the indicator, we only need to count how many said “yes”, keeping only one of the records when the taxon was multiassessed:

indicator3<-ind3_data %>%
                 # keep only one record if the taxon was assessed more than once within the country
                 select(country_assessment, taxon, temp_gen_monitoring) %>%
                 filter(!duplicated(.)) %>%

                 # count "yes" in tem_gen_monitoring by country
                 filter(temp_gen_monitoring=="yes") %>%
                 group_by(country_assessment) %>%
                 summarise(n_taxon_gen_monitoring= n())

Join indicators and metadata in a single table

It could be useful to have the estimated indicator and the metadata in a single large table.

indicators_full<-left_join(metadata, indicator1) %>% 
                     left_join(ind2_data) %>% 
                     left_join(ind3_data)
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "common_name", "kobo_tabular", "X_validation_status",
## "X_uuid", "GBIF_taxonID", "NCBI_taxonID", "national_taxonID",
## "source_national_taxonID", "other_populations", "time_populations",
## "defined_populations", "source_definition_populations", "map_populations",
## "map_populations_URL", "habitat_decline_area", "source_populations",
## "popsize_data", "ne_pops_exists", "nc_pops_exists", "ratio_exists",
## "species_related", "ratio_species_related", "ratio_year",
## "source_popsize_ratios", "species_comments", "realm", "IUCN_habitat",
## "other_habitat", "national_endemic", "transboundary_type", "other_explain",
## "country_proportion", "species_range", "rarity", "occurrence_extent",
## "occurrence_area", "pop_fragmentation_level", "species_range_comments",
## "global_IUCN", "regional_redlist", "other_assessment_status",
## "other_assessment_name", "source_status_distribution", "fecundity",
## "semelparous_offpring", "reproductive_strategy", "reproductive_strategy_other",
## "adult_age_data", "other_reproductive_strategy", "longevity_max",
## "longevity_median", "longevity_maturity", "longevity_age",
## "life_history_based_on", "life_history_sp_basedon", "sources_life_history",
## "multiassessment")
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "X_validation_status", "X_uuid", "other_populations",
## "time_populations", "defined_populations", "source_definition_populations",
## "map_populations", "map_populations_URL", "habitat_decline_area",
## "source_populations", "multiassessment")
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "X_validation_status", "X_uuid", "multiassessment")

Save indicators data

Save indicators data and metadata to csv files, useful for analyses outside R.

# save processed data
write.csv(ind1_data, "ind1_data.csv", row.names = FALSE)
write.csv(indicators_full, "indicators_full.csv", row.names = FALSE)
write.csv(ind2_data, "ind2_data.csv", row.names = FALSE)
write.csv(ind3_data, "ind3_data.csv", row.names = FALSE)
write.csv(metadata, "metadata.csv", row.names = FALSE)

Change country name to nicer labels

To have nice levels in the plots we will change the way country names are written:

# make factor
metadata$country_assessment<-as.factor(metadata$country_assessment)
indicators_full$country_assessment<-as.factor(indicators_full$country_assessment)
ind2_data$country_assessment<-as.factor(ind2_data$country_assessment)
ind1_data$country_assessment<-as.factor(ind1_data$country_assessment)
indicator1$country_assessment<-as.factor(indicator1$country_assessment)

# original levels
levels(metadata$country_assessment)
## [1] "australia"     "belgium"       "colombia"      "france"       
## [5] "japan"         "mexico"        "south_africa"  "sweden"       
## [9] "united_states"
# change
levels(metadata$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(indicators_full$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(ind1_data$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(ind2_data$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(indicator1$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")

Simplify combinations of methods to define populations

The methods used to define populations come from a check box question were one or more of the following categories can be selected: genetic_clusters, geographic_boundaries, eco_biogeo_proxies, adaptive_traits, management_units, other. As a consequence any combination of the former can be possible. Leading to the following frequency table:

table(indicators_full$defined_populations)
## 
##                                                                            adaptive_traits 
##                                                                                          5 
##                                                           adaptive_traits management_units 
##                                                                                          1 
##                                                                           dispersal_buffer 
##                                                                                        159 
##                                                           dispersal_buffer adaptive_traits 
##                                                                                          2 
##                                                        dispersal_buffer eco_biogeo_proxies 
##                                                                                          1 
##                                                                     dispersal_buffer other 
##                                                                                          1 
##                                                                         eco_biogeo_proxies 
##                                                                                         44 
##                                                         eco_biogeo_proxies adaptive_traits 
##                                                                                          3 
##                                                        eco_biogeo_proxies dispersal_buffer 
##                                                                                          7 
##                                                        eco_biogeo_proxies management_units 
##                                                                                          3 
##                                                                   eco_biogeo_proxies other 
##                                                                                          2 
##                                                                           genetic_clusters 
##                                                                                        108 
##                                                           genetic_clusters adaptive_traits 
##                                                                                          7 
##                                                          genetic_clusters dispersal_buffer 
##                                                                                         11 
##                                                        genetic_clusters eco_biogeo_proxies 
##                                                                                         26 
##                                        genetic_clusters eco_biogeo_proxies adaptive_traits 
##                                                                                          3 
##                       genetic_clusters eco_biogeo_proxies adaptive_traits management_units 
##                                                                                          2 
##                                       genetic_clusters eco_biogeo_proxies management_units 
##                                                                                          1 
##                                                     genetic_clusters geographic_boundaries 
##                                                                                         70 
##                                     genetic_clusters geographic_boundaries adaptive_traits 
##                                                                                          5 
##                                  genetic_clusters geographic_boundaries eco_biogeo_proxies 
##                                                                                          8 
##                  genetic_clusters geographic_boundaries eco_biogeo_proxies adaptive_traits 
##                                                                                          1 
## genetic_clusters geographic_boundaries eco_biogeo_proxies adaptive_traits management_units 
##                                                                                          1 
##                 genetic_clusters geographic_boundaries eco_biogeo_proxies management_units 
##                                                                                          1 
##                                    genetic_clusters geographic_boundaries management_units 
##                                                                                          8 
##                                                          genetic_clusters management_units 
##                                                                                          5 
##                                                                     genetic_clusters other 
##                                                                                          2 
##                                                                      geographic_boundaries 
##                                                                                        274 
##                                                      geographic_boundaries adaptive_traits 
##                                                                                         12 
##                               geographic_boundaries adaptive_traits management_units other 
##                                                                                          1 
##                                                     geographic_boundaries dispersal_buffer 
##                                                                                          1 
##                                                   geographic_boundaries eco_biogeo_proxies 
##                                                                                        114 
##                                   geographic_boundaries eco_biogeo_proxies adaptive_traits 
##                                                                                          3 
##                                  geographic_boundaries eco_biogeo_proxies management_units 
##                                                                                          3 
##                                             geographic_boundaries eco_biogeo_proxies other 
##                                                                                          2 
##                                                     geographic_boundaries management_units 
##                                                                                         24 
##                                                                geographic_boundaries other 
##                                                                                         12 
##                                                                           management_units 
##                                                                                         29 
##                                                                     management_units other 
##                                                                                          1 
##                                                                                      other 
##                                                                                         19

It is hard to group the above methods, so we will keep the original groups with n >=19 in the above list, and tag the combinations that appear few times as as “other_combinations”.

Which groups have n>=19?

x<-as.data.frame(table(indicators_full$defined_populations)[table(indicators_full$defined_populations) >= 19])
colnames(x)[1]<-"method"

x

We can add this new column to the metadata and indicator data:

### for indicators 
indicators_full<- indicators_full %>% 
  mutate(defined_populations_simplified = case_when(
         # if the method is in the list of methods n>=19 then keep it
         defined_populations %in% x$method ~ defined_populations,
         TRUE ~ "other_combinations"))


### for meta
metadata<- metadata %>% 
  mutate(defined_populations_simplified = case_when(
         # if the method is in the list of methods n>=19 then keep it
         defined_populations %in% x$method ~ defined_populations,
         TRUE ~ "other_combinations"))

### for ind1 raw data
ind1_data<- ind1_data %>% 
  mutate(defined_populations_simplified = case_when(
         # if the method is in the list of methods n>=19 then keep it
         defined_populations %in% x$method ~ defined_populations,
         TRUE ~ "other_combinations"))

Check n for simplified methods:

table(indicators_full$defined_populations_simplified)
## 
##                         dispersal_buffer 
##                                      159 
##                       eco_biogeo_proxies 
##                                       44 
##                         genetic_clusters 
##                                      108 
##      genetic_clusters eco_biogeo_proxies 
##                                       26 
##   genetic_clusters geographic_boundaries 
##                                       70 
##                    geographic_boundaries 
##                                      274 
## geographic_boundaries eco_biogeo_proxies 
##                                      114 
##   geographic_boundaries management_units 
##                                       24 
##                         management_units 
##                                       29 
##                                    other 
##                                       19 
##                       other_combinations 
##                                      115

Table of equivalences:

indicators_full %>% 
       select(defined_populations, defined_populations_simplified) %>% 
       filter(!duplicated(defined_populations))

Create nicer names for ploting

# original method names
levels(as.factor(indicators_full$defined_populations_simplified))
##  [1] "dispersal_buffer"                        
##  [2] "eco_biogeo_proxies"                      
##  [3] "genetic_clusters"                        
##  [4] "genetic_clusters eco_biogeo_proxies"     
##  [5] "genetic_clusters geographic_boundaries"  
##  [6] "geographic_boundaries"                   
##  [7] "geographic_boundaries eco_biogeo_proxies"
##  [8] "geographic_boundaries management_units"  
##  [9] "management_units"                        
## [10] "other"                                   
## [11] "other_combinations"
# nicer names
nice_names <- c("dispersal buffer",
                "eco- biogeographic proxies",
                 "genetic clusters",
                 "genetic clusters & eco- biogeographic proxies",
                 "genetic clusters & geographic boundaries",
                 "geographic boundaries",
                 "geographic boundaries & eco- biogeographic proxies",
                 "geographic boundaries & management units",
                 "management units",
                 "other", 
                 "other combinations")


### add them
indicators_full$defined_populations_nicenames <- factor(
    indicators_full$defined_populations_simplified,
    levels = levels(as.factor(indicators_full$defined_populations_simplified)),
    labels = nice_names)

# metadata
metadata$defined_populations_nicenames <- factor(
    metadata$defined_populations_simplified,
    levels = levels(as.factor(metadata$defined_populations_simplified)),
    labels = nice_names)

#check names match
select(metadata, defined_populations_nicenames, defined_populations_simplified)
levels(indicators_full$defined_populations_nicenames)
##  [1] "dispersal buffer"                                  
##  [2] "eco- biogeographic proxies"                        
##  [3] "genetic clusters"                                  
##  [4] "genetic clusters & eco- biogeographic proxies"     
##  [5] "genetic clusters & geographic boundaries"          
##  [6] "geographic boundaries"                             
##  [7] "geographic boundaries & eco- biogeographic proxies"
##  [8] "geographic boundaries & management units"          
##  [9] "management units"                                  
## [10] "other"                                             
## [11] "other combinations"

Averaging multiassessments

Some taxa were assessed twice or more times, for example to account for uncertainty on how to divide populations. This information is stored in variable multiassessment of the metadata (created by get_metadata()). An example of taxa with multiple assessments:

metadata %>%
filter(multiassessment=="multiassessment")  %>%
  select(taxonomic_group, taxon, country_assessment, multiassessment) %>%
  arrange(taxon, country_assessment) %>%
  head()

Multiassessments allow to account for uncertainty in the number of populations or the size of them. We can examine how the indicators value species by species as done elsewhere in these analyses (see below “Values for indicator 1 and 2 for multiassessed species), but to examine global trends, some of the figures below use the average. The averages are stored in a different column, labeled indicator[1 or 2]_mean.

indicators_averaged<-indicators_full %>%
  # group desired multiassessments
  group_by(country_assessment, multiassessment, taxon) %>%
  # estimate means
  mutate(indicator1_mean=mean(indicator1, na.rm=TRUE)) %>%
  mutate(indicator2_mean=mean(indicator2, na.rm=TRUE)) %>%
  # change NaN for NA (needed due to the NAs and 0s in the dataset)
  mutate_all(~ifelse(is.nan(.), NA, .)) 
## `mutate_all()` ignored the following grouping variables:
## • Columns `country_assessment`, `multiassessment`, `taxon`
## ℹ Use `mutate_at(df, vars(-group_cols()), myoperation)` to silence the message.

Examples of how this looks to check it was done properly. For indicator 1:

indicators_averaged %>%
  filter(taxon == "Barbastella barbastellus") %>%
  select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean)
indicators_averaged %>%
  filter(taxon == "Rana dalmatina") %>%
  select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean)
indicators_averaged %>%
  filter(taxon == "Ambystoma cingulatum") %>%
  select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean)

For Proportion of maintained populations (indicator):

indicators_averaged %>%
  filter(taxon == "Ambystoma cingulatum") %>%
  select(taxon, country_assessment, multiassessment, indicator2, indicator2_mean)

Because we will use the averages to show a single value for multiasssessed taxa, we can keep only the first record for multiassessed taxa.

indicators_averaged_one<-indicators_averaged[!duplicated(cbind(indicators_averaged$taxon, indicators_averaged$country_assessment)), ]

General description of records and taxa assessed by country

Records by country, including taxa assessed more than once (see below for details on this)

ggplot(metadata, aes(x=country_assessment)) + 
  geom_bar(stat = "count") +
  xlab("") +
  ggtitle("Number of taxa assessed by country, including taxa assed more than once") +
  theme_light()

To explore what kind of taxa countries assessed regardless of if they assessed them once or more, we are going to use the subset indicators_averaged_one, were we averaged the indicators and kept only 1 record per assessment.

How many taxa were assessed (i.e. counting only once taxa that were assessed multiple times)?

# how many?
nrow(indicators_averaged_one)
## [1] 919

Plot taxa assessed excluding duplicates, i.e. the real number of taxa assessed:

p1<-ggplot(indicators_averaged_one, aes(x=country_assessment)) + 
  geom_bar(stat = "count") +
  xlab("") +
  ggtitle("Number of taxa assessed by country") +
  theme_light()
p1

Of which countries and taxonomic groups are the taxa that were assessed more than once?

p2<- indicators_averaged_one %>% # we use the _unique dataset so that multiassesed records are counted only once
        filter(multiassessment=="multiassessment") %>%

ggplot(aes(x=taxonomic_group, fill=country_assessment)) + 
  geom_bar(stat = "count") +
  theme(axis.text.x = element_text(angle = 45)) +
  labs(fill="Country") +
  xlab("") +
  ggtitle("Number of taxa assessed more than once") +
  theme_light()

p2

Heatmap of the taxa assessed by country (counting multiassessments only once)

We aimed to represent different taxonomic groups within animals (amphibians, birds, fishes, invertebrates, mammals and reptiles), plants (angiosperms, bryophytes, gymnosperms and pteridophytes), fungi and others (e.g. lichens). Order levels to represent those categories:

indicators_averaged_one$taxonomic_group<- factor(indicators_averaged_one$taxonomic_group,
                                                 levels = c("amphibian", "bird", "fish", "invertebrate", "mammal", "reptile", "angiosperm", "bryophyte", "gymnosperm", "pteridophytes", "fungus", "other"))

Make a heatmap

## Agregate data to get counts 

agg_data <- indicators_averaged_one %>% # we use the _unique dataset so that multiassesed records are counted only once
        filter(multiassessment!="multiassessment") %>%
        group_by(country_assessment, taxonomic_group) %>%
        summarize(count = n())
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# country names in desired order
agg_data$country_assessment <- factor(agg_data$country_assessment, 
                                      levels = rev(levels(agg_data$country_assessment)))
  
  
## Create a heat map
p_heat<- ggplot(agg_data, aes(x = taxonomic_group, y = country_assessment, fill = count)) + 
  geom_tile() + 
  scale_fill_gradient(low = "lightblue", high = "darkblue") +  # Adjust color scale as needed
  labs(x = "",
    y = "",
    fill = "Number of taxa"
  ) + 
  scale_x_discrete(position = "top") +
  theme_light() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 0),
    legend.position = "right", text = element_text(size = 13),
    panel.grid.major = element_blank(), panel.grid.minor = element_blank(), # remove background
  )
p_heat

Supplementary Figure: Number of species and multiassessed species per country

plot_grid(p1 + ggtitle(""),
          p2 + ggtitle(""), ncol = 1, labels = c("a)", "b)"))

Population size data (Has Nc or Ne? what type of Nc?)

Supplementary Figure: Population size data availability by country

Countries have population size data (Nc or Ne) regardless of the taxonomic group. The last panel includes the entire dataset:

## Duplicate data with an additional column "facet"

df<-CreateAllFacet(metadata, "country_assessment")

# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US", "all"))

# Plot
ggplot(df, aes(x=taxonomic_group, fill=popsize_data)) + 
  geom_bar(stat = "count") +
  coord_flip() +
  facet_wrap(~facet, ncol = 5, scales="free_x") +
  scale_fill_manual(values=c("#2ca02c", "#1f77b4", "grey80"),
                    breaks=c("yes", "data_for_species", "insuff_data_species"),
                    labels=c("Population level", "Species or subspecies level", "Insufficient data")) +  labs(fill="Population size data availability",
       x="",
       y="Number of taxa (including records of taxa assessed more than once)") +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="top")

Population size data availability in the entire dataset:

ggplot(metadata, aes(x=taxonomic_group, fill=popsize_data)) + 
  geom_bar(stat = "count") +
  coord_flip() +
  scale_fill_manual(values=c("#1f77b4", "grey80", "#2ca02c"),
                    breaks=c(levels(as.factor(metadata$popsize_data))),
                                        labels=c("Species level or subspecies level", "Insufficient data", "Population level")) +
  labs(fill="Population size data availability",
       x="",
       y="Number of taxa (including records of taxa assessed more than once)") +
  theme_light() +
  theme(legend.position="right")

Species level yes/no table with percentages

df<- metadata %>%
     group_by(popsize_data) %>%
   summarise(n=n(),
             percentage = (n / nrow(metadata)) * 100)
   
kable(df, digits = 0)
popsize_data n percentage
data_for_species 130 13
insuff_data_species 216 22
yes 613 63
NA 7 1

Ne data yes or not? & Type of Nc data

Ne available by taxa? (species level)

p1<- metadata %>% 
  filter(!is.na(ne_pops_exists)) %>% 
  filter(ne_pops_exists!="other_genetic_info") %>%
    ggplot(aes(x=country_assessment, fill=ne_pops_exists)) + 
  geom_bar() +
scale_fill_manual(labels=c("no", "yes"),
                      breaks=c("no_genetic_data", "ne_available"),
                      values=c("#ff7f0e", "#2ca02c")) +
xlab("") +
ylab("Number of taxa") +
labs(fill="Ne available \n(from genetic data)")  +
theme_light() +
theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())

p1

Nc data available by taxa? (species level)

p2<-metadata %>%
  filter(!is.na(nc_pops_exists)) %>%
    ggplot(aes(x=country_assessment, fill=nc_pops_exists)) +
    geom_bar() +
    scale_fill_manual(values=c("#ff7f0e", "#2ca02c")) + 
    labs(fill="Nc available") +
    xlab("") +
    ylab("Number of taxa") +
    theme_light() +
    theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p2

What kind of Nc data? (dodge bars) This is at population level.

ind1_data %>%
  filter(!is.na(NcType)) %>%
  ggplot(aes(x=country_assessment, fill=NcType))+
  geom_bar(position = "dodge") +
  scale_fill_manual(labels=c("Point", "Range \nor qualitative", "Unknown"),
                      breaks=c("Nc_point", "Nc_range", "unknown"),
                      values=c("#0072B2", "#E69F00", "grey80")) +
  xlab("") +
  ylab("Number of populations") +
  labs(fill="Type of Nc data \nby population") +
  theme_light() +
  theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())

What kind of Nc data? (fill bars). This is at population level.

p3<-ind1_data %>%
  filter(!is.na(NcType)) %>%
  ggplot(aes(x=country_assessment, fill=NcType))+
  geom_bar(position = "fill", color="white") +
  scale_fill_manual(labels=c("Point", "Range \nor qualitative", "Unknown"),
                      breaks=c("Nc_point", "Nc_range", "unknown"),
                      values=c("#0072B2", "#E69F00", "grey80")) +
  xlab("") +
  ylab("Proportion of populations") +
  labs(fill="Type of Nc data \nby population") +
  theme_light() +
  theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p3

Data availability at the population level cosidering Ne and Nc combined. This plot shows where data came from for the Ne value used for estimating the indicator.

p4<-ind1_data %>%
  mutate(Ne_calculated_from = replace_na(Ne_calculated_from, "NA")) %>%
  ggplot(aes(x=country_assessment, fill=Ne_calculated_from))+
  geom_bar(position = "fill", color="white") +
  scale_fill_manual(labels=c("genetic data", "NcPoint ratio", "NcRange ratio", "NA"),
                      breaks=c("genetic data", "NcPoint ratio", "NcRange ratio", "NA"),
                      values=c("darkgreen", "#0072B2", "#E69F00", "grey80")) +
  xlab("") +
  scale_x_discrete(limits=rev) + 
  ylab("Proportion of populations") +
  labs(fill="Data used to estimate Ne") +
  theme_light() +
  coord_flip() +
  theme(text = element_text(size = 13), legend.position = "bottom", panel.border = element_blank())
p4

Supplementary Figure: Ne and Nc data availabiltiy by species

plot_grid(p1 + theme(legend.justification = c(0,.5)),  # legend.justification is used to aling legends
          p2 + theme(legend.justification = c(0,.5)),
          ncol=1, rel_widths = c(1,1,1,1), align = "v", labels=c("a)", "b)"), vjust = .7)  

Range of values for Ne and Nc data

Range of Ne values by taxonomic group, without possible outliers (Ne > 100000)

ind1_data %>%
  filter(Ne < 100000) %>%
  filter(!is.na(Ne))  %>%
  
  ggplot(aes(x=taxonomic_group, y=Ne)) +
  geom_boxplot(color="grey50") +
  geom_jitter(size=.5, width = 0.1, color="darkred") +
  xlab("") +
  theme_light() +
  theme(axis.text.x = element_text(angle = 45))

Check outliers

ind1_data %>% 
  filter(Ne > 100000) %>%
  select(country_assessment, name_assessor, taxon, taxonomic_group, Ne, NeLower, NeUpper, multiassessment, population)

Range of Nc values (actual data point provided) by taxonomic group. Without possible outliers.

ind1_data %>%
  filter(!is.na(NcPoint))  %>%
  filter(NcPoint < 10000000) %>%
 
  ggplot(aes(x=taxonomic_group, y=NcPoint)) +
  geom_boxplot(color="grey50") +
  geom_jitter(size=.5, width = 0.1, color="darkred") +
  xlab("") +
  theme_light() +
  theme(axis.text.x = element_text(angle = 45))

Check outliers

ind1_data %>% 
  filter(NcPoint > 10000000) %>% 
  select(country_assessment, name_assessor, taxon, taxonomic_group, population, NcPoint, NcLower, NcUpper, multiassessment, population)

Range of Ne values by taxonomic group from different sources. Without possible outliers.

ind1_data %>%
  filter(!is.na(Ne_combined))  %>%
  filter(Ne < 100000) %>%
 
  ggplot(aes(x=taxonomic_group, y=Ne_combined)) +
  geom_boxplot(color="grey50") +
  geom_jitter(size=.5, width = 0.1, color="darkred") +
  xlab("") +
  theme_light() +
  theme(axis.text.x = element_text(angle = 45))

Range of Ne values by taxonomic group from different sources. Zoom to Ne < 10,000

ind1_data %>%
  filter(!is.na(Ne_combined))  %>%
  filter(Ne < 10000) %>%
 
  ggplot(aes(x=taxonomic_group, y=Ne_combined)) +
  geom_boxplot(color="grey50") +
  geom_jitter(size=.5, width = 0.1, color="darkred") +
  xlab("") +
  theme_light() +
  theme(axis.text.x = element_text(angle = 45))

Missing data on extant and extinct populations

We have NA in Proportion of maintained populations (indicator) because in some cases the number of extinct populations is unknown, therefore the operation cannot be computed.

Counts

Total records with NA in extant populations:

sum(is.na(indicators_full$n_extant_populations))
## [1] 19

Taxa with NA in extant populations:

indicators_full %>%
  filter(is.na(n_extant_populations)) %>%
    select(country_assessment, taxonomic_group, taxon, n_extant_populations, n_extint_populations)

Total taxa with NA in extinct populations:

sum(is.na(indicators_full$n_extint_populations))
## [1] 417

Do taxa with NA for extant also have NA for extinct?

indicators_full$taxon[is.na(indicators_full$n_extant_populations)] %in% indicators_full$taxon[is.na(indicators_full$n_extint_populations)]
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE

So out of the 982, we have 417 records with NA in n_extinct and 19 records with NA in n_extant. Of them, 19 have NA in both n_extant and n_extinct.

Plot missing data extinct populations

p5<-indicators_full %>%
  ggplot(aes(x=country_assessment, fill=is.na(n_extint_populations))) +
  geom_bar() +
  scale_fill_manual(labels=c("number of populations known", "missing data"),
                    values=c("#2ca02c", "#ff7f0e")) + 
  labs(fill="Extinct populations") +
  xlab("") + ylab("Number of taxa") +
  theme_light() +
  theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p5

Supplementary Figure: Missing data in extinct populations by country

Missing data in number of extinct populations by method to define populations:

indicators_full %>%
  ggplot(aes(x=defined_populations_nicenames, fill=is.na(n_extint_populations)))+
  geom_bar() +
  coord_flip()+ 
  scale_fill_manual(labels=c("number of populations known", "missing data"),
                    values=c("#2ca02c", "#ff7f0e")) + 
  labs(fill="Extinct populations") +
  xlab("") + ylab("Number of taxa") +
  facet_wrap(country_assessment ~., nrow = 3, scales="free_x") +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="top")

Main Figure: Taxa by country and data availability to estimate Ne indciator (origion of data to estimate Ne) and PM indicator (missing data on pop extinction):

Distribution of Nc, Ne and types of Ne in a single figure with 3 panels, using count for a & b, and proportions for c:

# plot

plot_grid(p_heat + theme(legend.position = "right", legend.justification = c(0,.5)), # legend.justification aligns legends
          p5 + coord_flip() + 
            scale_x_discrete(limits=rev) +
            theme(legend.position = "right", legend.justification = c(0,.5)), 
          p4  + theme(legend.position = "right", legend.justification = c(0,.5)), 
          ncol = 1, labels = c("a)", "b)", "c)"), align = "v",
          rel_heights = c(1.3, 1, 1))

Main Figure: Method to define populations used by country and taxa (alluvial)

Reformat data

select(metadata, defined_populations_nicenames, defined_populations_simplified)
# reformat data
foralluvial<-metadata %>% group_by(country_assessment, defined_populations_nicenames, taxonomic_group) %>%
             summarise(n=n()) 
## `summarise()` has grouped output by 'country_assessment',
## 'defined_populations_nicenames'. You can override using the `.groups` argument.
# define colors
my_cols<- simplifiedmethods_colors

# we need a vector of colors by country for each row of the dataset, so:
methodspop<-as.factor(foralluvial$defined_populations_nicenames)
levels(methodspop)<-my_cols
methodspop<-as.vector(methodspop)
head(methodspop)
## [1] "#668cd1" "#668cd1" "#668cd1" "#668cd1" "#668cd1" "#45c097"

Plot

# plot
alluvial(foralluvial[,1:3], freq = foralluvial$n,
         col=methodspop, 
         blocks=FALSE,
         gap.width = 0.5,
         cex=.8, 
         xw = 0.1,
         cw = 0.2,
         border = NA,
         alpha = .7)

Exploratory plots for the association of distribution range (restricted vs wide) on the indicators

All the following plots and analyses consider the average of multiassessed species (variable _mean), so that they are shown only once.

To have nicer looking plots, change “wide_ranging” for “wide ranging”:

indicators_averaged_one$species_range<-gsub("wide_ranging", "wide ranging", indicators_averaged_one$species_range)

Indicator 1 (Ne>5000)

Plot Indicator 1 by type of range in the entire dataset. Filtering NA in species range:

# get sample size by desired category
sample_size <- indicators_averaged_one  %>%
                    filter(!is.na(indicator1_mean)) %>% 
                    filter(!is.na(species_range)) %>% 
                    group_by(species_range) %>% summarize(num=n())

# plot
p1<-indicators_averaged_one %>% 
    filter(!is.na(indicator1_mean)) %>% 
    filter(!is.na(species_range)) %>% 
  # add sampling size 
  left_join(sample_size) %>% 
  mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%

  # plot
  ggplot(aes(x=myaxis, y=indicator1_mean , fill=species_range)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of populations within species with Ne>500") +
      coord_flip() +
      scale_fill_manual(breaks=c("wide ranging", "restricted", "unknown"),
                       labels=c("wide ranging", "restricted", "unknown"),
                       values=c("#00BFC4", "#F8766D", "grey80")) +
      theme_light() +
      theme(panel.border = element_blank(), legend.position="none", text= element_text(size=20))
## Joining, by = "species_range"
p1

Supplementary Figure: Plot Ne Indicator by country and type of range. Remove “unknown” and NA for better visualization.

### Duplicate dataframe to have a column with "all data" for faceting
df<-CreateAllFacet(indicators_averaged_one, "country_assessment")

# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US", "all"))

## plot
df  %>% 
  # filter out "unknown" range
  filter(species_range !="unknown") %>% 
  
# plot
ggplot(aes(x=species_range, y=indicator1_mean , fill=species_range)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of populations within species with Ne>500") +
      coord_flip() +
      scale_x_discrete(breaks=c("wide ranging", "restricted"),
                        labels=c("wide ranging", "restricted")) +
      theme_light() +
      theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15)) +
      facet_wrap(~facet, ncol = 5) +
      theme(panel.spacing = unit(1.5, "lines"))  
## Warning: Removed 658 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 658 rows containing missing values (`geom_point()`).

Indicator 2 (mantained populations)

Plot Indicator 2 by type of range in the entire dataset. Filtering NA in species range:

# get sample size by desired category
sample_size <- indicators_averaged_one  %>%
                    filter(!is.na(indicator2_mean)) %>% 
                    filter(!is.na(species_range)) %>% 
                    group_by(species_range) %>% summarize(num=n())

# plot
p2<-indicators_averaged_one %>% 
    filter(!is.na(indicator2_mean)) %>% 
    filter(!is.na(species_range)) %>% 
  # add sampling size 
  left_join(sample_size) %>% 
  mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%

  # plot
  ggplot(aes(x=myaxis, y=indicator2_mean , fill=species_range)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of maintained populations within species") +
      coord_flip() +
      scale_fill_manual(breaks=c("wide ranging", "restricted", "unknown"),
                       labels=c("wide ranging", "restricted", "unknown"),
                       values=c("#00BFC4", "#F8766D", "grey80")) +
      theme_light() +
      theme(panel.border = element_blank(), legend.position="none", text= element_text(size=20))
## Joining, by = "species_range"
p2

Plot Indicator 2 by country and type of range. We remove NA and unknown for better visualization.

### Duplicate dataframe to have a column with "all data" for faceting
df<-CreateAllFacet(indicators_averaged_one, "country_assessment")

# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US", "all"))

## plot
df  %>% 
  # filter out "unknown" range
  filter(species_range !="unknown") %>% 
  
# plot
ggplot(aes(x=species_range, y=indicator2_mean , fill=species_range)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of maintained populations within species") +
      coord_flip() +
      theme_light() +
      theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15)) +
      facet_wrap(~facet, ncol = 5) +
      theme(panel.spacing = unit(1.5, "lines"))  
## Warning: Removed 742 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Removed 742 rows containing missing values (`geom_point()`).

Single plot PM and Ne indicators by range type

plot_grid(p1, p2,  ncol=1, align = "v", labels=c("a)", "b)"))  

Sampling size of different variables useful to interpret the statistical models run below

The plots and tables below are meant to visualize the sampling size and data distribution of some of the variables used in the models below. Data is a subset filtering outliers (>500 populations) and using the simplified methods (see above). Multiassessed species are considered independently (each assessment is a data point).

Supplementary Figure: Number of maintained populations by country and method

Number of maintained populations by country and method is useful to interpret the models that would be run below.

indicators_full %>% 
  filter(n_extant_populations<500) %>% # filter outliers
  # order countries vertically by similar number of pops
  mutate(country_assessment = factor(country_assessment, 
                                     levels=c("Colombia", "Australia", "Belgium",
                                               "Mexico", "France", "US", 
                                               "S. Africa", "Japan", "Sweden"))) %>%
  ggplot(aes(x=defined_populations_nicenames, y=n_extant_populations, 
             fill=defined_populations_nicenames, color=defined_populations_nicenames)) +
          geom_boxplot() +
          geom_jitter(size=.3, width = 0.1, color="black") +
  coord_flip() +
  facet_wrap(country_assessment ~ ., nrow=3, scales="free_x") +
  xlab("")  +
  ylab("Number of maintained populations") +
  scale_fill_manual(values=alpha(simplifiedmethods_colors, .3),
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_x_discrete(limits=rev) +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="none",
        text = element_text(size = 15)) 

Ne values

ind1_data %>% 
  filter(Ne_combined < 100000) %>% # filter outliers

  ggplot(aes(x=defined_populations_simplified, y=Ne_combined, 
             color=Ne_calculated_from)) +
          geom_boxplot(position = "dodge") +
          geom_jitter(position = position_dodge(width = 0.75)) +  
          facet_wrap(country_assessment ~ ., nrow=3) +
          coord_flip() +
          theme_light()

Zoom to Ne 500

ind1_data %>% 
  filter(Ne_combined < 100000) %>% # filter outliers

  ggplot(aes(x=defined_populations_simplified, y=Ne_combined, 
             color=Ne_calculated_from)) +
          ylim(0,2000)+
          geom_boxplot(position = "dodge") +
          geom_jitter(position = position_dodge(width = 0.75)) +  
          facet_wrap(country_assessment ~ ., nrow=3) +
          coord_flip() +
          theme_light()
## Warning: Removed 104 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 104 rows containing missing values (`geom_point()`).

Summary table for sampling size by method and source of Ne:

x<- ind1_data %>% 
  filter(!is.na(Ne_calculated_from)) %>% 
  group_by(defined_populations_simplified, Ne_calculated_from) %>%
                summarise(n=n())
## `summarise()` has grouped output by 'defined_populations_simplified'. You can
## override using the `.groups` argument.
kable(x)
defined_populations_simplified Ne_calculated_from n
dispersal_buffer genetic data 10
dispersal_buffer NcPoint ratio 226
dispersal_buffer NcRange ratio 1114
eco_biogeo_proxies genetic data 8
eco_biogeo_proxies NcPoint ratio 55
eco_biogeo_proxies NcRange ratio 82
genetic_clusters genetic data 43
genetic_clusters NcPoint ratio 32
genetic_clusters NcRange ratio 59
genetic_clusters eco_biogeo_proxies genetic data 4
genetic_clusters eco_biogeo_proxies NcPoint ratio 3
genetic_clusters eco_biogeo_proxies NcRange ratio 18
genetic_clusters geographic_boundaries genetic data 44
genetic_clusters geographic_boundaries NcPoint ratio 34
genetic_clusters geographic_boundaries NcRange ratio 83
geographic_boundaries genetic data 142
geographic_boundaries NcPoint ratio 478
geographic_boundaries NcRange ratio 594
geographic_boundaries eco_biogeo_proxies genetic data 8
geographic_boundaries eco_biogeo_proxies NcPoint ratio 68
geographic_boundaries eco_biogeo_proxies NcRange ratio 200
geographic_boundaries management_units genetic data 29
geographic_boundaries management_units NcPoint ratio 189
geographic_boundaries management_units NcRange ratio 22
management_units NcPoint ratio 48
management_units NcRange ratio 76
other NcPoint ratio 3
other NcRange ratio 14
other_combinations genetic data 61
other_combinations NcPoint ratio 130
other_combinations NcRange ratio 712

Same as above but adding country:

x<- ind1_data %>% 
  filter(!is.na(Ne_calculated_from)) %>% 
  group_by(country_assessment, defined_populations_simplified, Ne_calculated_from) %>%
                summarise(n=n())
## `summarise()` has grouped output by 'country_assessment',
## 'defined_populations_simplified'. You can override using the `.groups`
## argument.
kable(x)
country_assessment defined_populations_simplified Ne_calculated_from n
Australia genetic_clusters genetic data 7
Australia genetic_clusters NcPoint ratio 8
Australia genetic_clusters geographic_boundaries genetic data 15
Australia genetic_clusters geographic_boundaries NcPoint ratio 13
Australia genetic_clusters geographic_boundaries NcRange ratio 7
Australia geographic_boundaries genetic data 15
Australia geographic_boundaries NcPoint ratio 76
Australia geographic_boundaries NcRange ratio 59
Australia geographic_boundaries management_units NcPoint ratio 8
Australia geographic_boundaries management_units NcRange ratio 3
Australia management_units NcRange ratio 3
Australia other_combinations NcRange ratio 4
Belgium dispersal_buffer genetic data 10
Belgium dispersal_buffer NcPoint ratio 8
Belgium dispersal_buffer NcRange ratio 844
Belgium genetic_clusters NcRange ratio 7
Belgium other_combinations genetic data 40
Belgium other_combinations NcPoint ratio 2
Belgium other_combinations NcRange ratio 379
Colombia geographic_boundaries eco_biogeo_proxies NcPoint ratio 4
Colombia geographic_boundaries eco_biogeo_proxies NcRange ratio 46
Colombia other_combinations NcRange ratio 1
France eco_biogeo_proxies genetic data 7
France genetic_clusters genetic data 3
France genetic_clusters NcRange ratio 1
France genetic_clusters eco_biogeo_proxies genetic data 3
France genetic_clusters eco_biogeo_proxies NcPoint ratio 1
France genetic_clusters geographic_boundaries genetic data 6
France genetic_clusters geographic_boundaries NcPoint ratio 6
France genetic_clusters geographic_boundaries NcRange ratio 7
France geographic_boundaries NcPoint ratio 12
France geographic_boundaries NcRange ratio 22
France geographic_boundaries eco_biogeo_proxies NcPoint ratio 1
France geographic_boundaries eco_biogeo_proxies NcRange ratio 2
France geographic_boundaries management_units genetic data 15
France geographic_boundaries management_units NcPoint ratio 38
France geographic_boundaries management_units NcRange ratio 12
France management_units NcPoint ratio 10
France management_units NcRange ratio 8
France other_combinations genetic data 3
France other_combinations NcPoint ratio 20
France other_combinations NcRange ratio 10
Japan dispersal_buffer NcPoint ratio 214
Japan dispersal_buffer NcRange ratio 232
Japan geographic_boundaries NcPoint ratio 1
Japan geographic_boundaries NcRange ratio 5
Mexico genetic_clusters genetic data 13
Mexico genetic_clusters NcPoint ratio 15
Mexico genetic_clusters NcRange ratio 24
Mexico genetic_clusters eco_biogeo_proxies genetic data 1
Mexico genetic_clusters eco_biogeo_proxies NcRange ratio 17
Mexico genetic_clusters geographic_boundaries genetic data 2
Mexico genetic_clusters geographic_boundaries NcPoint ratio 6
Mexico genetic_clusters geographic_boundaries NcRange ratio 15
Mexico geographic_boundaries NcRange ratio 75
Mexico other NcRange ratio 1
Mexico other_combinations genetic data 4
Mexico other_combinations NcRange ratio 26
S. Africa genetic_clusters genetic data 12
S. Africa genetic_clusters NcPoint ratio 3
S. Africa genetic_clusters NcRange ratio 6
S. Africa genetic_clusters eco_biogeo_proxies NcPoint ratio 2
S. Africa genetic_clusters eco_biogeo_proxies NcRange ratio 1
S. Africa genetic_clusters geographic_boundaries genetic data 2
S. Africa genetic_clusters geographic_boundaries NcPoint ratio 2
S. Africa genetic_clusters geographic_boundaries NcRange ratio 11
S. Africa geographic_boundaries genetic data 2
S. Africa geographic_boundaries NcPoint ratio 28
S. Africa geographic_boundaries NcRange ratio 21
S. Africa geographic_boundaries management_units NcRange ratio 1
S. Africa management_units NcPoint ratio 1
S. Africa other NcRange ratio 1
S. Africa other_combinations genetic data 2
S. Africa other_combinations NcPoint ratio 8
S. Africa other_combinations NcRange ratio 4
Sweden dispersal_buffer NcPoint ratio 4
Sweden dispersal_buffer NcRange ratio 38
Sweden eco_biogeo_proxies NcRange ratio 26
Sweden genetic_clusters genetic data 7
Sweden genetic_clusters NcPoint ratio 3
Sweden genetic_clusters NcRange ratio 11
Sweden genetic_clusters geographic_boundaries genetic data 19
Sweden genetic_clusters geographic_boundaries NcPoint ratio 6
Sweden genetic_clusters geographic_boundaries NcRange ratio 41
Sweden geographic_boundaries genetic data 2
Sweden geographic_boundaries NcPoint ratio 67
Sweden geographic_boundaries NcRange ratio 168
Sweden geographic_boundaries management_units NcPoint ratio 3
Sweden geographic_boundaries management_units NcRange ratio 5
Sweden management_units NcPoint ratio 12
Sweden other NcRange ratio 10
Sweden other_combinations genetic data 3
Sweden other_combinations NcPoint ratio 7
Sweden other_combinations NcRange ratio 265
US eco_biogeo_proxies genetic data 1
US eco_biogeo_proxies NcPoint ratio 55
US eco_biogeo_proxies NcRange ratio 56
US genetic_clusters genetic data 1
US genetic_clusters NcPoint ratio 3
US genetic_clusters NcRange ratio 10
US genetic_clusters geographic_boundaries NcPoint ratio 1
US genetic_clusters geographic_boundaries NcRange ratio 2
US geographic_boundaries genetic data 123
US geographic_boundaries NcPoint ratio 294
US geographic_boundaries NcRange ratio 244
US geographic_boundaries eco_biogeo_proxies genetic data 8
US geographic_boundaries eco_biogeo_proxies NcPoint ratio 63
US geographic_boundaries eco_biogeo_proxies NcRange ratio 152
US geographic_boundaries management_units genetic data 14
US geographic_boundaries management_units NcPoint ratio 140
US geographic_boundaries management_units NcRange ratio 1
US management_units NcPoint ratio 25
US management_units NcRange ratio 65
US other NcPoint ratio 3
US other NcRange ratio 2
US other_combinations genetic data 9
US other_combinations NcPoint ratio 93
US other_combinations NcRange ratio 23

Distribution type (wide / restricted)

x<-indicators_full %>% filter(species_range !="unknown") %>%
                    group_by(defined_populations_nicenames, species_range) %>% 
                    summarise(n=n())
## `summarise()` has grouped output by 'defined_populations_nicenames'. You can
## override using the `.groups` argument.
kable(x)
defined_populations_nicenames species_range n
dispersal buffer restricted 54
dispersal buffer wide_ranging 99
eco- biogeographic proxies restricted 26
eco- biogeographic proxies wide_ranging 17
genetic clusters restricted 46
genetic clusters wide_ranging 60
genetic clusters & eco- biogeographic proxies restricted 8
genetic clusters & eco- biogeographic proxies wide_ranging 18
genetic clusters & geographic boundaries restricted 34
genetic clusters & geographic boundaries wide_ranging 34
geographic boundaries restricted 202
geographic boundaries wide_ranging 71
geographic boundaries & eco- biogeographic proxies restricted 74
geographic boundaries & eco- biogeographic proxies wide_ranging 17
geographic boundaries & management units restricted 14
geographic boundaries & management units wide_ranging 10
management units restricted 14
management units wide_ranging 13
other restricted 13
other wide_ranging 4
other combinations restricted 45
other combinations wide_ranging 64

Supplementary Figure: number of mantained populations by method to define populations and range type

Facet by range type:

p<-indicators_full %>% 
  filter(!is.na(n_extant_populations)) %>% 
  filter(n_extant_populations<500) %>%
  filter(species_range !="unknown") %>%
  filter(!is.na(species_range)) %>%
  
  ggplot(aes(x=defined_populations_nicenames, y=n_extant_populations)) +
          geom_boxplot(aes(color=defined_populations_nicenames,
                           fill=defined_populations_nicenames)) + 
    xlab("") + ylab("Number of maintained populations") +
  coord_flip() +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="none",
        plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots
  scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  facet_wrap("species_range", ncol=1) + 
  geom_jitter(size=.4, width = 0.1, color="black") +
  scale_x_discrete(limits=rev)

p

Statistical models: test for associations between method used to define populations / range type on the number of populations and the indicator values

The analyses and plots below us a subset of data filtering outliers (>500 populations) and using the simplified methods (see above). Multiassessed species are considered independently (each assessment is a data point).

(a) Does the number of maintained pops vary with method used?

First we tested whether the different methods reported in this study were associated with varying numbers of populations obtained. For this analysis, we also controlled for range type, as we expect species with wider ranges to plausibly have more populations than species with narrower ranges.

Plot number of populations by method.

# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
                    filter(!is.na(n_extant_populations)) %>% 
                    filter(n_extant_populations<500) %>%
                    group_by(defined_populations_nicenames) %>% summarize(num=n())

# custom axis
## new dataframe
df<-indicators_full %>% 
  filter(!is.na(n_extant_populations)) %>% 
  filter(n_extant_populations<500) %>%
    # add sampling size 
  left_join(sample_size) %>%
  mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
  mutate(myaxis = factor(myaxis, 
                  levels=levels(as.factor(myaxis))[c(1,12,2:11,13)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
# plot for number of pops
  pa<- df %>%
  ggplot(aes(x=myaxis, y=n_extant_populations, color=defined_populations_nicenames,
                                               fill=defined_populations_nicenames)) +
          geom_boxplot() + xlab("") + ylab("Number of maintained populations") +
          geom_jitter(size=.4, width = 0.1, color="black") +
  coord_flip() +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="none",
        plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots
  scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_x_discrete(limits=rev) +  
  theme(text = element_text(size = 13))
pa

Prepare data for model (remove outliers, “unknown” category and NA in desired variable) and check n:

# remove missing data 
data_for_model<-indicators_full %>% 
                      filter(!is.na(n_extant_populations)) %>%
                      filter(species_range !="unknown") %>% # we remove "unknonw" because its n is too low, thus unbalancing the model
                      filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots

# check n per method
table(data_for_model$defined_populations_simplified)
## 
##                         dispersal_buffer 
##                                      149 
##                       eco_biogeo_proxies 
##                                       43 
##                         genetic_clusters 
##                                      104 
##      genetic_clusters eco_biogeo_proxies 
##                                       25 
##   genetic_clusters geographic_boundaries 
##                                       68 
##                    geographic_boundaries 
##                                      269 
## geographic_boundaries eco_biogeo_proxies 
##                                       90 
##   geographic_boundaries management_units 
##                                       24 
##                         management_units 
##                                       27 
##                                    other 
##                                       14 
##                       other_combinations 
##                                      106
# total n
nrow(data_for_model)
## [1] 919
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
                                                       ref="geographic_boundaries")

# make sure specis range is a factor
data_for_model$species_range<-as.factor(data_for_model$species_range)

Run model asking: Does the number of maintained pops vary with method and range?

m.a1<-glmer(data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified + data_for_model$species_range + (1|data_for_model$country_assessment), family ="poisson")

summary(m.a1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: 
## data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified +  
##     data_for_model$species_range + (1 | data_for_model$country_assessment)
## 
##      AIC      BIC   logLik deviance df.resid 
##  25019.5  25082.2 -12496.8  24993.5      906 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -8.057 -2.887 -1.133  0.695 89.652 
## 
## Random effects:
##  Groups                            Name        Variance Std.Dev.
##  data_for_model$country_assessment (Intercept) 0.9191   0.9587  
## Number of obs: 919, groups:  data_for_model$country_assessment, 9
## 
## Fixed effects:
##                                                                                       Estimate
## (Intercept)                                                                            1.97560
## data_for_model$defined_populations_simplifieddispersal_buffer                         -1.26073
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                       -0.13462
## data_for_model$defined_populations_simplifiedgenetic_clusters                         -1.55617
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      -1.97499
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.04942
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.19851
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units   -0.13256
## data_for_model$defined_populations_simplifiedmanagement_units                         -0.84557
## data_for_model$defined_populations_simplifiedother                                    -1.30402
## data_for_model$defined_populations_simplifiedother_combinations                       -0.77247
## data_for_model$species_rangewide_ranging                                               1.09745
##                                                                                       Std. Error
## (Intercept)                                                                              0.32026
## data_for_model$defined_populations_simplifieddispersal_buffer                            0.05067
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                          0.03225
## data_for_model$defined_populations_simplifiedgenetic_clusters                            0.06222
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies         0.08953
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries      0.03548
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    0.03936
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units      0.05084
## data_for_model$defined_populations_simplifiedmanagement_units                            0.05480
## data_for_model$defined_populations_simplifiedother                                       0.11140
## data_for_model$defined_populations_simplifiedother_combinations                          0.03493
## data_for_model$species_rangewide_ranging                                                 0.01957
##                                                                                       z value
## (Intercept)                                                                             6.169
## data_for_model$defined_populations_simplifieddispersal_buffer                         -24.879
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                        -4.174
## data_for_model$defined_populations_simplifiedgenetic_clusters                         -25.009
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      -22.060
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries     1.393
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  -5.044
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units    -2.607
## data_for_model$defined_populations_simplifiedmanagement_units                         -15.431
## data_for_model$defined_populations_simplifiedother                                    -11.705
## data_for_model$defined_populations_simplifiedother_combinations                       -22.114
## data_for_model$species_rangewide_ranging                                               56.074
##                                                                                       Pr(>|z|)
## (Intercept)                                                                           6.88e-10
## data_for_model$defined_populations_simplifieddispersal_buffer                          < 2e-16
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                       2.99e-05
## data_for_model$defined_populations_simplifiedgenetic_clusters                          < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.16363
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 4.56e-07
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units    0.00913
## data_for_model$defined_populations_simplifiedmanagement_units                          < 2e-16
## data_for_model$defined_populations_simplifiedother                                     < 2e-16
## data_for_model$defined_populations_simplifiedother_combinations                        < 2e-16
## data_for_model$species_rangewide_ranging                                               < 2e-16
##                                                                                          
## (Intercept)                                                                           ***
## data_for_model$defined_populations_simplifieddispersal_buffer                         ***
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                       ***
## data_for_model$defined_populations_simplifiedgenetic_clusters                         ***
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      ***
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries      
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies ***
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units   ** 
## data_for_model$defined_populations_simplifiedmanagement_units                         ***
## data_for_model$defined_populations_simplifiedother                                    ***
## data_for_model$defined_populations_simplifiedother_combinations                       ***
## data_for_model$species_rangewide_ranging                                              ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                                    (Intr) dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_mdl$dfnd_ppltns_smplfdd_     -0.034                               
## dt_fr_$____                        -0.012  0.093                        
## dt_fr_mdl$dfnd_ppltns_smplfdg_     -0.014  0.096                        
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ -0.004  0.039                        
## dt_f_$___g_                        -0.022  0.117                        
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ -0.020  0.073                        
## dt_f_$___m_                        -0.012  0.056                        
## dt_fr_mdl$dfnd_ppltns_smplfdm_     -0.006  0.063                        
## dt_fr_mdl$d__                      -0.005  0.031                        
## dt_fr_mdl$dfnd_ppltns_smplfdt_     -0.027  0.419                        
## dt_fr_mdl$s__                      -0.029 -0.090                        
##                                    d__$____ dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdd_                                            
## dt_fr_$____                                                               
## dt_fr_mdl$dfnd_ppltns_smplfdg_      0.096                                 
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__  0.099    0.045                        
## dt_f_$___g_                         0.158    0.152                        
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__  0.223    0.077                        
## dt_f_$___m_                         0.131    0.067                        
## dt_fr_mdl$dfnd_ppltns_smplfdm_      0.152    0.064                        
## dt_fr_mdl$d__                       0.062    0.030                        
## dt_fr_mdl$dfnd_ppltns_smplfdt_      0.178    0.131                        
## dt_fr_mdl$s__                      -0.102   -0.082                        
##                                    dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ d__$_g
## dt_fr_mdl$dfnd_ppltns_smplfdd_                                              
## dt_fr_$____                                                                 
## dt_fr_mdl$dfnd_ppltns_smplfdg_                                              
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__                                          
## dt_f_$___g_                         0.068                                   
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__  0.085                              0.130
## dt_f_$___m_                         0.047                              0.109
## dt_fr_mdl$dfnd_ppltns_smplfdm_      0.060                              0.103
## dt_fr_mdl$d__                       0.023                              0.053
## dt_fr_mdl$dfnd_ppltns_smplfdt_      0.073                              0.196
## dt_fr_mdl$s__                      -0.085                             -0.077
##                                    dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ d__$_m
## dt_fr_mdl$dfnd_ppltns_smplfdd_                                              
## dt_fr_$____                                                                 
## dt_fr_mdl$dfnd_ppltns_smplfdg_                                              
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__                                          
## dt_f_$___g_                                                                 
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__                                          
## dt_f_$___m_                         0.113                                   
## dt_fr_mdl$dfnd_ppltns_smplfdm_      0.130                              0.077
## dt_fr_mdl$d__                       0.050                              0.034
## dt_fr_mdl$dfnd_ppltns_smplfdt_      0.148                              0.106
## dt_fr_mdl$s__                      -0.101                             -0.005
##                                    dt_fr_mdl$dfnd_ppltns_smplfdm_ dt_fr_mdl$d__
## dt_fr_mdl$dfnd_ppltns_smplfdd_                                                 
## dt_fr_$____                                                                    
## dt_fr_mdl$dfnd_ppltns_smplfdg_                                                 
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__                                             
## dt_f_$___g_                                                                    
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__                                             
## dt_f_$___m_                                                                    
## dt_fr_mdl$dfnd_ppltns_smplfdm_                                                 
## dt_fr_mdl$d__                       0.036                                      
## dt_fr_mdl$dfnd_ppltns_smplfdt_      0.114                          0.053       
## dt_fr_mdl$s__                      -0.118                         -0.010       
##                                    dt_fr_mdl$dfnd_ppltns_smplfdt_
## dt_fr_mdl$dfnd_ppltns_smplfdd_                                   
## dt_fr_$____                                                      
## dt_fr_mdl$dfnd_ppltns_smplfdg_                                   
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__                               
## dt_f_$___g_                                                      
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__                               
## dt_f_$___m_                                                      
## dt_fr_mdl$dfnd_ppltns_smplfdm_                                   
## dt_fr_mdl$d__                                                    
## dt_fr_mdl$dfnd_ppltns_smplfdt_                                   
## dt_fr_mdl$s__                      -0.110

Considering the role of method was so important for determining the number of populations, we also tested whether this effect remained after removing “wide-ranging” from the model. The objective here was to test whether method alone would also produce varying numbers of populations, for example if species rangedness were unknown.

Does the number of maintained pops vary with method used? (does method still influence number of populations if we exclude range type from the model):

m.a2<-glmer(data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified + 
            (1|data_for_model$country_assessment), family ="poisson")

See results:

summary(m.a2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: 
## data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified +  
##     (1 | data_for_model$country_assessment)
## 
##      AIC      BIC   logLik deviance df.resid 
##  28258.3  28316.2 -14117.1  28234.3      907 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -6.326 -2.953 -1.239  0.283 74.519 
## 
## Random effects:
##  Groups                            Name        Variance Std.Dev.
##  data_for_model$country_assessment (Intercept) 1.041    1.02    
## Number of obs: 919, groups:  data_for_model$country_assessment, 9
## 
## Fixed effects:
##                                                                                       Estimate
## (Intercept)                                                                            2.37273
## data_for_model$defined_populations_simplifieddispersal_buffer                         -0.98301
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                        0.05544
## data_for_model$defined_populations_simplifiedgenetic_clusters                         -1.25178
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      -1.48212
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.20082
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  0.03979
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units   -0.11256
## data_for_model$defined_populations_simplifiedmanagement_units                         -0.44813
## data_for_model$defined_populations_simplifiedother                                    -1.24179
## data_for_model$defined_populations_simplifiedother_combinations                       -0.54510
##                                                                                       Std. Error
## (Intercept)                                                                              0.34082
## data_for_model$defined_populations_simplifieddispersal_buffer                            0.05291
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                          0.03232
## data_for_model$defined_populations_simplifiedgenetic_clusters                            0.06198
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies         0.08929
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries      0.03468
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    0.03952
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units      0.05066
## data_for_model$defined_populations_simplifiedmanagement_units                            0.05449
## data_for_model$defined_populations_simplifiedother                                       0.11149
## data_for_model$defined_populations_simplifiedother_combinations                          0.03467
##                                                                                       z value
## (Intercept)                                                                             6.962
## data_for_model$defined_populations_simplifieddispersal_buffer                         -18.578
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                         1.715
## data_for_model$defined_populations_simplifiedgenetic_clusters                         -20.197
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      -16.599
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries     5.790
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   1.007
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units    -2.222
## data_for_model$defined_populations_simplifiedmanagement_units                          -8.223
## data_for_model$defined_populations_simplifiedother                                    -11.138
## data_for_model$defined_populations_simplifiedother_combinations                       -15.722
##                                                                                       Pr(>|z|)
## (Intercept)                                                                           3.36e-12
## data_for_model$defined_populations_simplifieddispersal_buffer                          < 2e-16
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                         0.0863
## data_for_model$defined_populations_simplifiedgenetic_clusters                          < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries   7.02e-09
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   0.3141
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units     0.0263
## data_for_model$defined_populations_simplifiedmanagement_units                          < 2e-16
## data_for_model$defined_populations_simplifiedother                                     < 2e-16
## data_for_model$defined_populations_simplifiedother_combinations                        < 2e-16
##                                                                                          
## (Intercept)                                                                           ***
## data_for_model$defined_populations_simplifieddispersal_buffer                         ***
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                       .  
## data_for_model$defined_populations_simplifiedgenetic_clusters                         ***
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      ***
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries   ***
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units   *  
## data_for_model$defined_populations_simplifiedmanagement_units                         ***
## data_for_model$defined_populations_simplifiedother                                    ***
## data_for_model$defined_populations_simplifiedother_combinations                       ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                                    (Intr) dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_mdl$dfnd_ppltns_smplfdd_     -0.036                               
## dt_fr_$____                        -0.015  0.088                        
## dt_fr_mdl$dfnd_ppltns_smplfdg_     -0.015  0.083                        
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ -0.006  0.034                        
## dt_f_$___g_                        -0.021  0.096                        
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ -0.023  0.069                        
## dt_f_$___m_                        -0.011  0.058                        
## dt_fr_mdl$dfnd_ppltns_smplfdm_     -0.010  0.054                        
## dt_fr_md$__                        -0.005  0.028                        
## dt_fr_mdl$dfnd_ppltns_smplfdt_     -0.028  0.414                        
##                                    d__$____ dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdd_                                            
## dt_fr_$____                                                               
## dt_fr_mdl$dfnd_ppltns_smplfdg_      0.092                                 
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__  0.094    0.042                        
## dt_f_$___g_                         0.173    0.131                        
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__  0.224    0.073                        
## dt_f_$___m_                         0.140    0.060                        
## dt_fr_mdl$dfnd_ppltns_smplfdm_      0.145    0.057                        
## dt_fr_md$__                         0.069    0.031                        
## dt_fr_mdl$dfnd_ppltns_smplfdt_      0.185    0.120                        
##                                    dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ d__$_g
## dt_fr_mdl$dfnd_ppltns_smplfdd_                                              
## dt_fr_$____                                                                 
## dt_fr_mdl$dfnd_ppltns_smplfdg_                                              
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__                                          
## dt_f_$___g_                         0.069                                   
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__  0.080                              0.141
## dt_f_$___m_                         0.050                              0.113
## dt_fr_mdl$dfnd_ppltns_smplfdm_      0.052                              0.106
## dt_fr_md$__                         0.026                              0.054
## dt_fr_mdl$dfnd_ppltns_smplfdt_      0.072                              0.179
##                                    dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ d__$_m
## dt_fr_mdl$dfnd_ppltns_smplfdd_                                              
## dt_fr_$____                                                                 
## dt_fr_mdl$dfnd_ppltns_smplfdg_                                              
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__                                          
## dt_f_$___g_                                                                 
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__                                          
## dt_f_$___m_                         0.118                                   
## dt_fr_mdl$dfnd_ppltns_smplfdm_      0.123                              0.081
## dt_fr_md$__                         0.058                              0.038
## dt_fr_mdl$dfnd_ppltns_smplfdt_      0.153                              0.113
##                                    dt_fr_mdl$dfnd_ppltns_smplfdm_ dt__$__
## dt_fr_mdl$dfnd_ppltns_smplfdd_                                           
## dt_fr_$____                                                              
## dt_fr_mdl$dfnd_ppltns_smplfdg_                                           
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__                                       
## dt_f_$___g_                                                              
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__                                       
## dt_f_$___m_                                                              
## dt_fr_mdl$dfnd_ppltns_smplfdm_                                           
## dt_fr_md$__                         0.039                                
## dt_fr_mdl$dfnd_ppltns_smplfdt_      0.111                          0.055

Extending from this result, we also tested whether species range alone is an important predictor of the number of extant populations, as species range is determined by the geographic spread of the species, but not necessarily fragmentation

Does the number of maintained pops vary with range?

m.a3<-glmer(n_extant_populations ~ species_range + (1|country_assessment), family = "poisson", data = data_for_model)

summary(m.a3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: n_extant_populations ~ species_range + (1 | country_assessment)
##    Data: data_for_model
## 
##      AIC      BIC   logLik deviance df.resid 
##  27562.5  27576.9 -13778.2  27556.5      916 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -6.726 -2.937 -1.259 -0.054 93.983 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.713    0.8444  
## Number of obs: 919, groups:  country_assessment, 9
## 
## Fixed effects:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.55158    0.28212    5.50 3.81e-08 ***
## species_rangewide_ranging  0.90544    0.01912   47.35  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## spcs_rngwd_ -0.041

(b) Does the proportion of maintained populations (indicator2) vary with method used to define populations?

Our next goal was to determine whether study design (i.e. clustering method to define populations) and/or species-level variables (number of populations, range type) appropriately were associated with the measurement of the genetic indicators.

Plot PM indicator by method to define populations:

# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
                    filter(!is.na(indicator2)) %>% 
                    filter(n_extant_populations<500) %>% 
                    group_by(defined_populations_nicenames) %>% summarize(num=n())

# custom axis
## new dataframe
df<-indicators_full %>% 
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator2)) %>% 
    # add sampling size 
  left_join(sample_size) %>%
  mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
  mutate(myaxis = factor(myaxis, 
                  levels=levels(as.factor(myaxis))[c(1,12,2:11,13)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
## plot for Proportion of maintained populations (indicator)
pb<- df %>%
  filter(n_extant_populations<500) %>%
  ggplot(aes(x=myaxis, y=indicator2, color=defined_populations_nicenames,    
                                     fill=defined_populations_nicenames)) +
          geom_boxplot() + xlab("") + ylab("Proportion of maintained populations within species") +
          geom_jitter(size=.4, width = 0.1, color="black") +
  coord_flip() +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="none",
        plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots) 
  scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_x_discrete(limits=rev) +
  theme(text = element_text(size = 13))
pb

Plot Scatter plot of indicator2 vs extant pops

psupA<- indicators_full %>%
  # filter outliers with too many pops and missing data
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator2)) %>%
  filter(!is.na(n_extant_populations)) %>%
  filter(species_range !="unknown") %>%
  
  # plot
    ggplot(aes(x=n_extant_populations, y=indicator2, color=defined_populations_nicenames)) +
    geom_point() +
    theme_light() +
    scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
    theme(legend.position = "none") +
    ylab("Proportion of maintained populations within species") +
    xlab("Number of maintained populations") +
    theme(text = element_text(size = 13))
psupA

psupA.1<- indicators_full %>%
  # filter outliers with too many pops and missing data
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator2)) %>%
  filter(!is.na(n_extant_populations)) %>%
  filter(species_range !="unknown") %>%
  
  # plot
    ggplot(aes(x=n_extant_populations, y=indicator2, color=species_range)) +
    geom_point() +
    theme_light() +
    theme(legend.position = "none") +
    ylab("Proportion of maintained populations within species") +
    xlab("Number of maintained populations") +
    theme(text = element_text(size = 13))
psupA.1

First we want to test if the Proportion of maintained populations (indicator 2) vary with method used.

Prepare data for model (remove outliers and NA in desired variable) and check n:

# remove missing data 
data_for_model<-indicators_full %>% 
                      filter(!is.na(indicator2)) %>%
                      filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots

# check n per method
table(data_for_model$defined_populations_simplified)
## 
##                         dispersal_buffer 
##                                       78 
##                       eco_biogeo_proxies 
##                                       32 
##                         genetic_clusters 
##                                       51 
##      genetic_clusters eco_biogeo_proxies 
##                                       18 
##   genetic_clusters geographic_boundaries 
##                                       41 
##                    geographic_boundaries 
##                                      176 
## geographic_boundaries eco_biogeo_proxies 
##                                       41 
##   geographic_boundaries management_units 
##                                       17 
##                         management_units 
##                                       23 
##                                    other 
##                                        9 
##                       other_combinations 
##                                       77
# total n
nrow(data_for_model)
## [1] 563
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
                                                       ref="geographic_boundaries")

Run model asking: Does Proportion of maintained populations (indicator 2) vary with method used? Controlling for variation in indicator2 among countries:

m.b1<-glmmTMB(indicator2 ~ defined_populations_simplified + (1|country_assessment), family = "ordbeta", data = data_for_model)

See results:

summary(m.b1)
##  Family: ordbeta  ( logit )
## Formula:          
## indicator2 ~ defined_populations_simplified + (1 | country_assessment)
## Data: data_for_model
## 
##      AIC      BIC   logLik deviance df.resid 
##    681.1    746.1   -325.5    651.1      548 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.3346   0.5784  
## Number of obs: 563, groups:  country_assessment, 9
## 
## Dispersion parameter for ordbeta family (): 4.01 
## 
## Conditional model:
##                                                                        Estimate
## (Intercept)                                                             0.58841
## defined_populations_simplifieddispersal_buffer                          0.25985
## defined_populations_simplifiedeco_biogeo_proxies                        0.07072
## defined_populations_simplifiedgenetic_clusters                          0.53440
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       0.84496
## defined_populations_simplifiedgenetic_clusters geographic_boundaries   -0.02910
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.08488
## defined_populations_simplifiedgeographic_boundaries management_units    0.34632
## defined_populations_simplifiedmanagement_units                         -0.17010
## defined_populations_simplifiedother                                     0.07577
## defined_populations_simplifiedother_combinations                        0.44042
##                                                                        Std. Error
## (Intercept)                                                               0.22695
## defined_populations_simplifieddispersal_buffer                            0.24779
## defined_populations_simplifiedeco_biogeo_proxies                          0.21201
## defined_populations_simplifiedgenetic_clusters                            0.25882
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies         0.44301
## defined_populations_simplifiedgenetic_clusters geographic_boundaries      0.21721
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    0.23326
## defined_populations_simplifiedgeographic_boundaries management_units      0.33767
## defined_populations_simplifiedmanagement_units                            0.24776
## defined_populations_simplifiedother                                       0.51341
## defined_populations_simplifiedother_combinations                          0.16779
##                                                                        z value
## (Intercept)                                                              2.593
## defined_populations_simplifieddispersal_buffer                           1.049
## defined_populations_simplifiedeco_biogeo_proxies                         0.334
## defined_populations_simplifiedgenetic_clusters                           2.065
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        1.907
## defined_populations_simplifiedgenetic_clusters geographic_boundaries    -0.134
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  -0.364
## defined_populations_simplifiedgeographic_boundaries management_units     1.026
## defined_populations_simplifiedmanagement_units                          -0.686
## defined_populations_simplifiedother                                      0.148
## defined_populations_simplifiedother_combinations                         2.625
##                                                                        Pr(>|z|)
## (Intercept)                                                             0.00952
## defined_populations_simplifieddispersal_buffer                          0.29432
## defined_populations_simplifiedeco_biogeo_proxies                        0.73869
## defined_populations_simplifiedgenetic_clusters                          0.03895
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       0.05648
## defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.89343
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  0.71593
## defined_populations_simplifiedgeographic_boundaries management_units    0.30506
## defined_populations_simplifiedmanagement_units                          0.49237
## defined_populations_simplifiedother                                     0.88267
## defined_populations_simplifiedother_combinations                        0.00867
##                                                                          
## (Intercept)                                                            **
## defined_populations_simplifieddispersal_buffer                           
## defined_populations_simplifiedeco_biogeo_proxies                         
## defined_populations_simplifiedgenetic_clusters                         * 
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      . 
## defined_populations_simplifiedgenetic_clusters geographic_boundaries     
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   
## defined_populations_simplifiedgeographic_boundaries management_units     
## defined_populations_simplifiedmanagement_units                           
## defined_populations_simplifiedother                                      
## defined_populations_simplifiedother_combinations                       **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Given the preceding relationships detected between method, number of populations, and species’ range, we investigated associations between these variables and our indicator values in more detail, to aid in understanding the underlying mechanisms that were driving the association between method (especially genetic clusters) and indicator 2. That is, we hypothesised that the relationship between method and indicator 2 may be an indirect result of the association between method and number of populations and species range.

First we added number of populations to our model testing the relationship between method and indicator 2

m.b2<-glmmTMB(indicator2 ~ defined_populations_simplified + n_extant_populations + (1|country_assessment), family = "ordbeta", data = data_for_model)

summary(m.b2)
##  Family: ordbeta  ( logit )
## Formula:          
## indicator2 ~ defined_populations_simplified + n_extant_populations +  
##     (1 | country_assessment)
## Data: data_for_model
## 
##      AIC      BIC   logLik deviance df.resid 
##    681.7    751.1   -324.9    649.7      547 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.3337   0.5776  
## Number of obs: 563, groups:  country_assessment, 9
## 
## Dispersion parameter for ordbeta family (): 4.07 
## 
## Conditional model:
##                                                                         Estimate
## (Intercept)                                                             0.567903
## defined_populations_simplifieddispersal_buffer                          0.270059
## defined_populations_simplifiedeco_biogeo_proxies                        0.062122
## defined_populations_simplifiedgenetic_clusters                          0.544212
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       0.873104
## defined_populations_simplifiedgenetic_clusters geographic_boundaries   -0.050636
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.088213
## defined_populations_simplifiedgeographic_boundaries management_units    0.373182
## defined_populations_simplifiedmanagement_units                         -0.150412
## defined_populations_simplifiedother                                     0.098218
## defined_populations_simplifiedother_combinations                        0.445949
## n_extant_populations                                                    0.001158
##                                                                        Std. Error
## (Intercept)                                                              0.227117
## defined_populations_simplifieddispersal_buffer                           0.246619
## defined_populations_simplifiedeco_biogeo_proxies                         0.210763
## defined_populations_simplifiedgenetic_clusters                           0.258211
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        0.442994
## defined_populations_simplifiedgenetic_clusters geographic_boundaries     0.217858
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   0.233333
## defined_populations_simplifiedgeographic_boundaries management_units     0.336928
## defined_populations_simplifiedmanagement_units                           0.247638
## defined_populations_simplifiedother                                      0.511800
## defined_populations_simplifiedother_combinations                         0.166517
## n_extant_populations                                                     0.001020
##                                                                        z value
## (Intercept)                                                              2.501
## defined_populations_simplifieddispersal_buffer                           1.095
## defined_populations_simplifiedeco_biogeo_proxies                         0.295
## defined_populations_simplifiedgenetic_clusters                           2.108
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        1.971
## defined_populations_simplifiedgenetic_clusters geographic_boundaries    -0.232
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  -0.378
## defined_populations_simplifiedgeographic_boundaries management_units     1.108
## defined_populations_simplifiedmanagement_units                          -0.607
## defined_populations_simplifiedother                                      0.192
## defined_populations_simplifiedother_combinations                         2.678
## n_extant_populations                                                     1.135
##                                                                        Pr(>|z|)
## (Intercept)                                                              0.0124
## defined_populations_simplifieddispersal_buffer                           0.2735
## defined_populations_simplifiedeco_biogeo_proxies                         0.7682
## defined_populations_simplifiedgenetic_clusters                           0.0351
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        0.0487
## defined_populations_simplifiedgenetic_clusters geographic_boundaries     0.8162
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   0.7054
## defined_populations_simplifiedgeographic_boundaries management_units     0.2680
## defined_populations_simplifiedmanagement_units                           0.5436
## defined_populations_simplifiedother                                      0.8478
## defined_populations_simplifiedother_combinations                         0.0074
## n_extant_populations                                                     0.2563
##                                                                          
## (Intercept)                                                            * 
## defined_populations_simplifieddispersal_buffer                           
## defined_populations_simplifiedeco_biogeo_proxies                         
## defined_populations_simplifiedgenetic_clusters                         * 
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      * 
## defined_populations_simplifiedgenetic_clusters geographic_boundaries     
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   
## defined_populations_simplifiedgeographic_boundaries management_units     
## defined_populations_simplifiedmanagement_units                           
## defined_populations_simplifiedother                                      
## defined_populations_simplifiedother_combinations                       **
## n_extant_populations                                                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Then we tested (see plot psupA) if there is a relationship between number of maintained populations and the PM indicator, overall, and/or with some methods?

Prepare data for model (remove outliers and NA in desired variable) and check n:

# remove missing data 
data_for_model<-indicators_full %>% 
                      filter(!is.na(indicator2)) %>%
                      filter(!is.na(n_extant_populations)) %>%
                      filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots

# check number of methods
length(unique(data_for_model$defined_populations_simplified))
## [1] 11
# check n per method
table(data_for_model$defined_populations_simplified)
## 
##                         dispersal_buffer 
##                                       78 
##                       eco_biogeo_proxies 
##                                       32 
##                         genetic_clusters 
##                                       51 
##      genetic_clusters eco_biogeo_proxies 
##                                       18 
##   genetic_clusters geographic_boundaries 
##                                       41 
##                    geographic_boundaries 
##                                      176 
## geographic_boundaries eco_biogeo_proxies 
##                                       41 
##   geographic_boundaries management_units 
##                                       17 
##                         management_units 
##                                       23 
##                                    other 
##                                        9 
##                       other_combinations 
##                                       77
# total n
nrow(data_for_model)
## [1] 563
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
                                                       ref="geographic_boundaries")

We tested for a relationship between number of populations alone with indicator 2 in our dataset (i.e. when not controlling for method).

Does number of populations alone affect indicator2 (i.e. not controlling for method)?:

msupA1 <- glmmTMB(indicator2 ~ n_extant_populations + (1|country_assessment), family = "ordbeta", data= data_for_model)

Summary:

summary(msupA1)
##  Family: ordbeta  ( logit )
## Formula:          indicator2 ~ n_extant_populations + (1 | country_assessment)
## Data: data_for_model
## 
##      AIC      BIC   logLik deviance df.resid 
##    679.6    705.6   -333.8    667.6      557 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.3213   0.5669  
## Number of obs: 563, groups:  country_assessment, 9
## 
## Dispersion parameter for ordbeta family (): 4.01 
## 
## Conditional model:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          0.7078135  0.2053016   3.448 0.000565 ***
## n_extant_populations 0.0007304  0.0010130   0.721 0.470923    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

But, there were statistically significant interactions between number of populations and some of the methods used, on indicator 2.

Does the effect of method on indicator2 depend on number of maintained pops?

# run model
msupA2 <- glmmTMB(indicator2 ~ defined_populations_simplified + n_extant_populations + defined_populations_simplified*n_extant_populations + (1|country_assessment), family = "ordbeta", data = data_for_model)
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, : NA/
## NaN function evaluation

Summary:

summary(msupA2)
##  Family: ordbeta  ( logit )
## Formula:          
## indicator2 ~ defined_populations_simplified + n_extant_populations +  
##     defined_populations_simplified * n_extant_populations + (1 |  
##     country_assessment)
## Data: data_for_model
## 
##      AIC      BIC   logLik deviance df.resid 
##    667.8    780.5   -307.9    615.8      537 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.3125   0.5591  
## Number of obs: 563, groups:  country_assessment, 9
## 
## Dispersion parameter for ordbeta family (): 4.64 
## 
## Conditional model:
##                                                                                               Estimate
## (Intercept)                                                                                  0.4925006
## defined_populations_simplifieddispersal_buffer                                               0.2460489
## defined_populations_simplifiedeco_biogeo_proxies                                             0.0188120
## defined_populations_simplifiedgenetic_clusters                                               0.5377349
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                            2.0994080
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                         0.1492989
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                       0.1119634
## defined_populations_simplifiedgeographic_boundaries management_units                         0.1085245
## defined_populations_simplifiedmanagement_units                                               0.3590284
## defined_populations_simplifiedother                                                         -1.4970103
## defined_populations_simplifiedother_combinations                                             0.3071256
## n_extant_populations                                                                         0.0032484
## defined_populations_simplifieddispersal_buffer:n_extant_populations                          0.0054877
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations                       -0.0003051
## defined_populations_simplifiedgenetic_clusters:n_extant_populations                          0.0143372
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations      -0.1030565
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations   -0.0053672
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations -0.0056177
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations    0.0484225
## defined_populations_simplifiedmanagement_units:n_extant_populations                         -0.0309641
## defined_populations_simplifiedother:n_extant_populations                                     0.4660849
## defined_populations_simplifiedother_combinations:n_extant_populations                        0.0074657
##                                                                                             Std. Error
## (Intercept)                                                                                  0.2237159
## defined_populations_simplifieddispersal_buffer                                               0.2488836
## defined_populations_simplifiedeco_biogeo_proxies                                             0.2386934
## defined_populations_simplifiedgenetic_clusters                                               0.3856654
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                            0.6606466
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                         0.2328456
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                       0.2574985
## defined_populations_simplifiedgeographic_boundaries management_units                         0.4294699
## defined_populations_simplifiedmanagement_units                                               0.3349141
## defined_populations_simplifiedother                                                          0.9685209
## defined_populations_simplifiedother_combinations                                             0.1918873
## n_extant_populations                                                                         0.0016599
## defined_populations_simplifieddispersal_buffer:n_extant_populations                          0.0073093
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations                        0.0030605
## defined_populations_simplifiedgenetic_clusters:n_extant_populations                          0.0617807
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations       0.0330792
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations    0.0025023
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations  0.0028528
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations    0.0531971
## defined_populations_simplifiedmanagement_units:n_extant_populations                          0.0149745
## defined_populations_simplifiedother:n_extant_populations                                     0.2848729
## defined_populations_simplifiedother_combinations:n_extant_populations                        0.0050503
##                                                                                             z value
## (Intercept)                                                                                   2.201
## defined_populations_simplifieddispersal_buffer                                                0.989
## defined_populations_simplifiedeco_biogeo_proxies                                              0.079
## defined_populations_simplifiedgenetic_clusters                                                1.394
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                             3.178
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                          0.641
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                        0.435
## defined_populations_simplifiedgeographic_boundaries management_units                          0.253
## defined_populations_simplifiedmanagement_units                                                1.072
## defined_populations_simplifiedother                                                          -1.546
## defined_populations_simplifiedother_combinations                                              1.601
## n_extant_populations                                                                          1.957
## defined_populations_simplifieddispersal_buffer:n_extant_populations                           0.751
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations                        -0.100
## defined_populations_simplifiedgenetic_clusters:n_extant_populations                           0.232
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations       -3.115
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations    -2.145
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations  -1.969
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations     0.910
## defined_populations_simplifiedmanagement_units:n_extant_populations                          -2.068
## defined_populations_simplifiedother:n_extant_populations                                      1.636
## defined_populations_simplifiedother_combinations:n_extant_populations                         1.478
##                                                                                             Pr(>|z|)
## (Intercept)                                                                                  0.02770
## defined_populations_simplifieddispersal_buffer                                               0.32285
## defined_populations_simplifiedeco_biogeo_proxies                                             0.93718
## defined_populations_simplifiedgenetic_clusters                                               0.16323
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                            0.00148
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                         0.52140
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                       0.66370
## defined_populations_simplifiedgeographic_boundaries management_units                         0.80050
## defined_populations_simplifiedmanagement_units                                               0.28372
## defined_populations_simplifiedother                                                          0.12219
## defined_populations_simplifiedother_combinations                                             0.10948
## n_extant_populations                                                                         0.05035
## defined_populations_simplifieddispersal_buffer:n_extant_populations                          0.45278
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations                        0.92060
## defined_populations_simplifiedgenetic_clusters:n_extant_populations                          0.81649
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations       0.00184
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations    0.03196
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations  0.04893
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations    0.36269
## defined_populations_simplifiedmanagement_units:n_extant_populations                          0.03866
## defined_populations_simplifiedother:n_extant_populations                                     0.10182
## defined_populations_simplifiedother_combinations:n_extant_populations                        0.13934
##                                                                                               
## (Intercept)                                                                                 * 
## defined_populations_simplifieddispersal_buffer                                                
## defined_populations_simplifiedeco_biogeo_proxies                                              
## defined_populations_simplifiedgenetic_clusters                                                
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                           **
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                          
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                        
## defined_populations_simplifiedgeographic_boundaries management_units                          
## defined_populations_simplifiedmanagement_units                                                
## defined_populations_simplifiedother                                                           
## defined_populations_simplifiedother_combinations                                              
## n_extant_populations                                                                        . 
## defined_populations_simplifieddispersal_buffer:n_extant_populations                           
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations                         
## defined_populations_simplifiedgenetic_clusters:n_extant_populations                           
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations      **
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations   * 
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations * 
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations     
## defined_populations_simplifiedmanagement_units:n_extant_populations                         * 
## defined_populations_simplifiedother:n_extant_populations                                      
## defined_populations_simplifiedother_combinations:n_extant_populations                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Because the method used to define a population appears to be important for these relationships, we conducted an additional analysis to simplify our analysis to only those species for which a single method was used to determine population clusters, and repeated the model presented above (evaluating a possible interaction between method and number of populations on indicator 2).

First, subset the data to only those taxa where a single method was used:

ind2_single_methods<-indicators_full %>% 
                      filter(!is.na(indicator2)) %>% 
                      filter(n_extant_populations<500) %>%  # doesn't make a difference in the test below, but useful for 
                      filter(defined_populations_simplified=="genetic_clusters" | 
                             defined_populations_simplified=="geographic_boundaries" |
                             defined_populations_simplified=="eco_biogeo_proxies" | 
                             defined_populations_simplified=="management_units" |
                             defined_populations_simplified=="dispersal_buffer")


# check number of methods
length(unique(ind2_single_methods$defined_populations_simplified))
## [1] 5
# check n by method
table(ind2_single_methods$defined_populations_simplified)
## 
##      dispersal_buffer    eco_biogeo_proxies      genetic_clusters 
##                    78                    32                    51 
## geographic_boundaries      management_units 
##                   176                    23
# check n total
nrow(ind2_single_methods)
## [1] 360
# re-level to use geographic boundaries as reference category for the analysis
ind2_single_methods$defined_populations_simplified<-relevel(as.factor(ind2_single_methods$defined_populations_simplified),
                                                       ref="geographic_boundaries")

Does the effect of “single” method on indicator2 depend on number of maintained pops?

msupA3<-glmmTMB(indicator2 ~ n_extant_populations + defined_populations_simplified +        n_extant_populations*defined_populations_simplified + (1|country_assessment), family = "ordbeta", data = ind2_single_methods)

# summary
summary(msupA3)
##  Family: ordbeta  ( logit )
## Formula:          
## indicator2 ~ n_extant_populations + defined_populations_simplified +  
##     n_extant_populations * defined_populations_simplified + (1 |  
##     country_assessment)
## Data: ind2_single_methods
## 
##      AIC      BIC   logLik deviance df.resid 
##    449.4    503.8   -210.7    421.4      346 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.2512   0.5012  
## Number of obs: 360, groups:  country_assessment, 8
## 
## Dispersion parameter for ordbeta family (): 4.29 
## 
## Conditional model:
##                                                                         Estimate
## (Intercept)                                                            0.4640580
## n_extant_populations                                                   0.0022418
## defined_populations_simplifieddispersal_buffer                         0.2080042
## defined_populations_simplifiedeco_biogeo_proxies                      -0.0533983
## defined_populations_simplifiedgenetic_clusters                         0.7560365
## defined_populations_simplifiedmanagement_units                         0.4187629
## n_extant_populations:defined_populations_simplifieddispersal_buffer    0.0060574
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies  0.0002143
## n_extant_populations:defined_populations_simplifiedgenetic_clusters   -0.0009572
## n_extant_populations:defined_populations_simplifiedmanagement_units   -0.0345958
##                                                                       Std. Error
## (Intercept)                                                            0.2262534
## n_extant_populations                                                   0.0016874
## defined_populations_simplifieddispersal_buffer                         0.2911548
## defined_populations_simplifiedeco_biogeo_proxies                       0.2434107
## defined_populations_simplifiedgenetic_clusters                         0.3879537
## defined_populations_simplifiedmanagement_units                         0.3409384
## n_extant_populations:defined_populations_simplifieddispersal_buffer    0.0073889
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies  0.0030383
## n_extant_populations:defined_populations_simplifiedgenetic_clusters    0.0624312
## n_extant_populations:defined_populations_simplifiedmanagement_units    0.0158856
##                                                                       z value
## (Intercept)                                                             2.051
## n_extant_populations                                                    1.329
## defined_populations_simplifieddispersal_buffer                          0.714
## defined_populations_simplifiedeco_biogeo_proxies                       -0.219
## defined_populations_simplifiedgenetic_clusters                          1.949
## defined_populations_simplifiedmanagement_units                          1.228
## n_extant_populations:defined_populations_simplifieddispersal_buffer     0.820
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies   0.070
## n_extant_populations:defined_populations_simplifiedgenetic_clusters    -0.015
## n_extant_populations:defined_populations_simplifiedmanagement_units    -2.178
##                                                                       Pr(>|z|)
## (Intercept)                                                             0.0403
## n_extant_populations                                                    0.1840
## defined_populations_simplifieddispersal_buffer                          0.4750
## defined_populations_simplifiedeco_biogeo_proxies                        0.8264
## defined_populations_simplifiedgenetic_clusters                          0.0513
## defined_populations_simplifiedmanagement_units                          0.2193
## n_extant_populations:defined_populations_simplifieddispersal_buffer     0.4123
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies   0.9438
## n_extant_populations:defined_populations_simplifiedgenetic_clusters     0.9878
## n_extant_populations:defined_populations_simplifiedmanagement_units     0.0294
##                                                                        
## (Intercept)                                                           *
## n_extant_populations                                                   
## defined_populations_simplifieddispersal_buffer                         
## defined_populations_simplifiedeco_biogeo_proxies                       
## defined_populations_simplifiedgenetic_clusters                        .
## defined_populations_simplifiedmanagement_units                         
## n_extant_populations:defined_populations_simplifieddispersal_buffer    
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies  
## n_extant_populations:defined_populations_simplifiedgenetic_clusters    
## n_extant_populations:defined_populations_simplifiedmanagement_units   *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Because we found a relationship between method and number of populations on indicator PM, and a relationship between species range and number of populations, we further tested whether the effect of method on indicator PM is moderated by species range.

First filter data to consider only wide ranging and restricted categories (ie remove unknown due to small sampling size)

## Remove unknown
data<- indicators_averaged_one  %>%
                    filter(!is.na(indicator2_mean)) %>% 
                    filter(species_range !="unknown")

# summary of indicator
summary(data$indicator2_mean)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.6667  1.0000  0.8264  1.0000  1.0000
# re-level to use geographic boundaries as reference category for the analysis
data$defined_populations_simplified<-relevel(as.factor(data$defined_populations_simplified),
                                                       ref="geographic_boundaries")

# make sure species range is a factor
data$species_range<-as.factor(data$species_range)  

Run model: Does method still impact indicator2 if we control for species range?

## + country
m.b3 <- glmmTMB(indicator2_mean ~ defined_populations_simplified + species_range + (1|country_assessment), family = "ordbeta", data = data)


# summary results
summary(m.b3)
##  Family: ordbeta  ( logit )
## Formula:          
## indicator2_mean ~ defined_populations_simplified + species_range +  
##     (1 | country_assessment)
## Data: data
## 
##      AIC      BIC   logLik deviance df.resid 
##    604.5    672.1   -286.3    572.5      488 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.3345   0.5784  
## Number of obs: 504, groups:  country_assessment, 9
## 
## Dispersion parameter for ordbeta family (): 4.04 
## 
## Conditional model:
##                                                                        Estimate
## (Intercept)                                                             0.60804
## defined_populations_simplifieddispersal_buffer                          0.08659
## defined_populations_simplifiedeco_biogeo_proxies                       -0.09268
## defined_populations_simplifiedgenetic_clusters                          0.24434
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       0.17471
## defined_populations_simplifiedgenetic_clusters geographic_boundaries   -0.17172
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.29551
## defined_populations_simplifiedgeographic_boundaries management_units    0.25322
## defined_populations_simplifiedmanagement_units                         -0.40567
## defined_populations_simplifiedother                                    -0.11961
## defined_populations_simplifiedother_combinations                        0.25855
## species_rangewide ranging                                               0.37873
##                                                                        Std. Error
## (Intercept)                                                               0.23038
## defined_populations_simplifieddispersal_buffer                            0.25664
## defined_populations_simplifiedeco_biogeo_proxies                          0.24320
## defined_populations_simplifiedgenetic_clusters                            0.26630
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies         0.52553
## defined_populations_simplifiedgenetic_clusters geographic_boundaries      0.22064
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    0.25083
## defined_populations_simplifiedgeographic_boundaries management_units      0.33743
## defined_populations_simplifiedmanagement_units                            0.29794
## defined_populations_simplifiedother                                       0.51439
## defined_populations_simplifiedother_combinations                          0.17706
## species_rangewide ranging                                                 0.11803
##                                                                        z value
## (Intercept)                                                              2.639
## defined_populations_simplifieddispersal_buffer                           0.337
## defined_populations_simplifiedeco_biogeo_proxies                        -0.381
## defined_populations_simplifiedgenetic_clusters                           0.918
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        0.332
## defined_populations_simplifiedgenetic_clusters geographic_boundaries    -0.778
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  -1.178
## defined_populations_simplifiedgeographic_boundaries management_units     0.750
## defined_populations_simplifiedmanagement_units                          -1.362
## defined_populations_simplifiedother                                     -0.233
## defined_populations_simplifiedother_combinations                         1.460
## species_rangewide ranging                                                3.209
##                                                                        Pr(>|z|)
## (Intercept)                                                             0.00831
## defined_populations_simplifieddispersal_buffer                          0.73581
## defined_populations_simplifiedeco_biogeo_proxies                        0.70315
## defined_populations_simplifiedgenetic_clusters                          0.35885
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       0.73955
## defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.43640
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  0.23876
## defined_populations_simplifiedgeographic_boundaries management_units    0.45299
## defined_populations_simplifiedmanagement_units                          0.17333
## defined_populations_simplifiedother                                     0.81613
## defined_populations_simplifiedother_combinations                        0.14422
## species_rangewide ranging                                               0.00133
##                                                                          
## (Intercept)                                                            **
## defined_populations_simplifieddispersal_buffer                           
## defined_populations_simplifiedeco_biogeo_proxies                         
## defined_populations_simplifiedgenetic_clusters                           
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        
## defined_populations_simplifiedgenetic_clusters geographic_boundaries     
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   
## defined_populations_simplifiedgeographic_boundaries management_units     
## defined_populations_simplifiedmanagement_units                           
## defined_populations_simplifiedother                                      
## defined_populations_simplifiedother_combinations                         
## species_rangewide ranging                                              **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Similarly to the effect of number of populations on indicator 2, we further tested whether there was an interaction between method and species range, i.e. to determine whether species range was only associated with indicator 2 for some methods.

## run model 
m.b4 <- glmmTMB(indicator2_mean ~ defined_populations_simplified + species_range + defined_populations_simplified*species_range + (1|country_assessment), family = "ordbeta", data = data)


# summary results
summary(m.b4)
##  Family: ordbeta  ( logit )
## Formula:          
## indicator2_mean ~ defined_populations_simplified + species_range +  
##     defined_populations_simplified * species_range + (1 | country_assessment)
## Data: data
## 
##      AIC      BIC   logLik deviance df.resid 
##    610.0    719.8   -279.0    558.0      478 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.3401   0.5831  
## Number of obs: 504, groups:  country_assessment, 9
## 
## Dispersion parameter for ordbeta family (): 4.23 
## 
## Conditional model:
##                                                                                                    Estimate
## (Intercept)                                                                                       5.606e-01
## defined_populations_simplifieddispersal_buffer                                                    2.209e-01
## defined_populations_simplifiedeco_biogeo_proxies                                                 -1.726e-01
## defined_populations_simplifiedgenetic_clusters                                                    1.575e-01
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                -2.853e-01
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                             -2.155e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                           -4.549e-02
## defined_populations_simplifiedgeographic_boundaries management_units                              6.932e-02
## defined_populations_simplifiedmanagement_units                                                    2.216e-02
## defined_populations_simplifiedother                                                              -3.876e-01
## defined_populations_simplifiedother_combinations                                                  2.774e-01
## species_rangewide ranging                                                                         5.126e-01
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging                         -3.300e-01
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging                        4.776e-02
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging                          6.842e-02
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging       1.977e+01
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging    5.824e-02
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging -7.361e-01
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging    6.680e-01
## defined_populations_simplifiedmanagement_units:species_rangewide ranging                         -9.713e-01
## defined_populations_simplifiedother:species_rangewide ranging                                     1.859e+01
## defined_populations_simplifiedother_combinations:species_rangewide ranging                       -1.222e-01
##                                                                                                  Std. Error
## (Intercept)                                                                                       2.342e-01
## defined_populations_simplifieddispersal_buffer                                                    2.883e-01
## defined_populations_simplifiedeco_biogeo_proxies                                                  3.146e-01
## defined_populations_simplifiedgenetic_clusters                                                    4.044e-01
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                 5.450e-01
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                              2.648e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                            2.938e-01
## defined_populations_simplifiedgeographic_boundaries management_units                              3.766e-01
## defined_populations_simplifiedmanagement_units                                                    4.014e-01
## defined_populations_simplifiedother                                                               5.286e-01
## defined_populations_simplifiedother_combinations                                                  2.308e-01
## species_rangewide ranging                                                                         2.258e-01
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging                          3.497e-01
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging                        4.812e-01
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging                          5.398e-01
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging       1.042e+04
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging    4.622e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging  4.543e-01
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging    8.601e-01
## defined_populations_simplifiedmanagement_units:species_rangewide ranging                          5.937e-01
## defined_populations_simplifiedother:species_rangewide ranging                                     7.096e+03
## defined_populations_simplifiedother_combinations:species_rangewide ranging                        3.750e-01
##                                                                                                  z value
## (Intercept)                                                                                        2.394
## defined_populations_simplifieddispersal_buffer                                                     0.766
## defined_populations_simplifiedeco_biogeo_proxies                                                  -0.549
## defined_populations_simplifiedgenetic_clusters                                                     0.390
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                 -0.523
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                              -0.814
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                            -0.155
## defined_populations_simplifiedgeographic_boundaries management_units                               0.184
## defined_populations_simplifiedmanagement_units                                                     0.055
## defined_populations_simplifiedother                                                               -0.733
## defined_populations_simplifiedother_combinations                                                   1.202
## species_rangewide ranging                                                                          2.270
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging                          -0.944
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging                         0.099
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging                           0.127
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging        0.002
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging     0.126
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging  -1.620
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging     0.777
## defined_populations_simplifiedmanagement_units:species_rangewide ranging                          -1.636
## defined_populations_simplifiedother:species_rangewide ranging                                      0.003
## defined_populations_simplifiedother_combinations:species_rangewide ranging                        -0.326
##                                                                                                  Pr(>|z|)
## (Intercept)                                                                                        0.0167
## defined_populations_simplifieddispersal_buffer                                                     0.4434
## defined_populations_simplifiedeco_biogeo_proxies                                                   0.5833
## defined_populations_simplifiedgenetic_clusters                                                     0.6969
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                  0.6006
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                               0.4159
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                             0.8770
## defined_populations_simplifiedgeographic_boundaries management_units                               0.8540
## defined_populations_simplifiedmanagement_units                                                     0.9560
## defined_populations_simplifiedother                                                                0.4634
## defined_populations_simplifiedother_combinations                                                   0.2293
## species_rangewide ranging                                                                          0.0232
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging                           0.3454
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging                         0.9209
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging                           0.8991
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging        0.9985
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging     0.8997
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging   0.1052
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging     0.4374
## defined_populations_simplifiedmanagement_units:species_rangewide ranging                           0.1018
## defined_populations_simplifiedother:species_rangewide ranging                                      0.9979
## defined_populations_simplifiedother_combinations:species_rangewide ranging                         0.7445
##                                                                                                   
## (Intercept)                                                                                      *
## defined_populations_simplifieddispersal_buffer                                                    
## defined_populations_simplifiedeco_biogeo_proxies                                                  
## defined_populations_simplifiedgenetic_clusters                                                    
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                 
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                              
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                            
## defined_populations_simplifiedgeographic_boundaries management_units                              
## defined_populations_simplifiedmanagement_units                                                    
## defined_populations_simplifiedother                                                               
## defined_populations_simplifiedother_combinations                                                  
## species_rangewide ranging                                                                        *
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging                          
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging                        
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging                          
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging       
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging    
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging  
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging    
## defined_populations_simplifiedmanagement_units:species_rangewide ranging                          
## defined_populations_simplifiedother:species_rangewide ranging                                     
## defined_populations_simplifiedother_combinations:species_rangewide ranging                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(c) Proportion of populations with Ne>500 (indicator1)

Our analysis of Ne indicator followed a parallel structure to our analysis of PM indicator.

Plot Ne indicator by method to define pops.

# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
                    filter(!is.na(indicator1)) %>% 
                    filter(n_extant_populations<500) %>% 
                    group_by(defined_populations_nicenames) %>% summarize(num=n())

# custom axis
## new dataframe
df<-indicators_full %>% 
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator1)) %>% 
    # add sampling size 
  left_join(sample_size) %>%
  mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
  mutate(myaxis = factor(myaxis, 
                  levels=levels(as.factor(myaxis))[c(1,12,2:11,13)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
## plot 
pc<- df %>%
  ggplot(aes(x=myaxis, y=indicator1, color=defined_populations_nicenames,    
                                     fill=defined_populations_nicenames)) +
          geom_boxplot() + xlab("") + ylab("Proportion of populations within species with Ne>500") +
          geom_jitter(size=.4, width = 0.1, color="black") +
  coord_flip() +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="none",
        plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots) 
  scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_x_discrete(limits=rev) +
  theme(text = element_text(size = 13))
pc

Scatter plot of indicator1 vs extant pops

psupB<- indicators_full %>%
  # filter outliers with too many pops and missing data
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator1)) %>%
  filter(!is.na(n_extant_populations)) %>%
  filter(species_range !="unknown") %>%
  
  
  # plot
    ggplot(aes(x=n_extant_populations, y=indicator1, color=defined_populations_nicenames)) +
    geom_point() +
    theme_light() +
    scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
    theme(legend.position = "none") +
    ylab("Proportion of populations within species with Ne>500") +
    xlab("Number of maintained populations") +
    theme(text = element_text(size = 13))
psupB

## Coloring by range 
psupB.1<- indicators_full %>%
  # filter outliers with too many pops and missing data
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator1)) %>%
  filter(!is.na(n_extant_populations)) %>%
  filter(species_range !="unknown") %>%
  
  
  # plot
    ggplot(aes(x=n_extant_populations, y=indicator1, color=species_range)) +
    geom_point() +
    theme_light() +
    theme(legend.position = "none") +
    ylab("Proportion of populations within species with Ne>500") +
    xlab("Number of maintained populations") +
    theme(text = element_text(size = 13))
psupB.1

First we tested whether method used was associated with variation in indicator (figure c)

Prepare data for model (remove outliers and NA in desired variable) and check n:

# remove missing data 
data_for_model<-indicators_full %>% 
                      filter(!is.na(indicator1)) %>%
                      filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots

# check n per method
table(data_for_model$defined_populations_simplified)
## 
##                         dispersal_buffer 
##                                      138 
##                       eco_biogeo_proxies 
##                                       18 
##                         genetic_clusters 
##                                       58 
##      genetic_clusters eco_biogeo_proxies 
##                                        8 
##   genetic_clusters geographic_boundaries 
##                                       41 
##                    geographic_boundaries 
##                                      159 
## geographic_boundaries eco_biogeo_proxies 
##                                       56 
##   geographic_boundaries management_units 
##                                       20 
##                         management_units 
##                                       13 
##                                    other 
##                                        6 
##                       other_combinations 
##                                       68
# total n
nrow(data_for_model)
## [1] 585
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
                                                       ref="geographic_boundaries")

Run model asking: Does Ne indicator vary with method used? Controlling for variation in indicator among countries:

m.c1<-glmmTMB(indicator1 ~ defined_populations_simplified + (1|country_assessment), family = "ordbeta", data = data_for_model)

See results:

summary(m.c1)
##  Family: ordbeta  ( logit )
## Formula:          
## indicator1 ~ defined_populations_simplified + (1 | country_assessment)
## Data: data_for_model
## 
##      AIC      BIC   logLik deviance df.resid 
##   1082.2   1147.8   -526.1   1052.2      570 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.08216  0.2866  
## Number of obs: 585, groups:  country_assessment, 9
## 
## Dispersion parameter for ordbeta family (): 3.88 
## 
## Conditional model:
##                                                                        Estimate
## (Intercept)                                                            -0.87277
## defined_populations_simplifieddispersal_buffer                          0.29210
## defined_populations_simplifiedeco_biogeo_proxies                       -0.17477
## defined_populations_simplifiedgenetic_clusters                          0.51445
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       1.03077
## defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.52363
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.09502
## defined_populations_simplifiedgeographic_boundaries management_units    0.62861
## defined_populations_simplifiedmanagement_units                         -0.04810
## defined_populations_simplifiedother                                     1.05279
## defined_populations_simplifiedother_combinations                        0.35747
##                                                                        Std. Error
## (Intercept)                                                               0.17691
## defined_populations_simplifieddispersal_buffer                            0.30559
## defined_populations_simplifiedeco_biogeo_proxies                          0.31388
## defined_populations_simplifiedgenetic_clusters                            0.23409
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies         0.43867
## defined_populations_simplifiedgenetic_clusters geographic_boundaries      0.24584
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    0.33583
## defined_populations_simplifiedgeographic_boundaries management_units      0.33280
## defined_populations_simplifiedmanagement_units                            0.41720
## defined_populations_simplifiedother                                       0.62077
## defined_populations_simplifiedother_combinations                          0.20335
##                                                                        z value
## (Intercept)                                                             -4.934
## defined_populations_simplifieddispersal_buffer                           0.956
## defined_populations_simplifiedeco_biogeo_proxies                        -0.557
## defined_populations_simplifiedgenetic_clusters                           2.198
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        2.350
## defined_populations_simplifiedgenetic_clusters geographic_boundaries     2.130
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  -0.283
## defined_populations_simplifiedgeographic_boundaries management_units     1.889
## defined_populations_simplifiedmanagement_units                          -0.115
## defined_populations_simplifiedother                                      1.696
## defined_populations_simplifiedother_combinations                         1.758
##                                                                        Pr(>|z|)
## (Intercept)                                                            8.07e-07
## defined_populations_simplifieddispersal_buffer                           0.3391
## defined_populations_simplifiedeco_biogeo_proxies                         0.5777
## defined_populations_simplifiedgenetic_clusters                           0.0280
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        0.0188
## defined_populations_simplifiedgenetic_clusters geographic_boundaries     0.0332
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   0.7772
## defined_populations_simplifiedgeographic_boundaries management_units     0.0589
## defined_populations_simplifiedmanagement_units                           0.9082
## defined_populations_simplifiedother                                      0.0899
## defined_populations_simplifiedother_combinations                         0.0788
##                                                                           
## (Intercept)                                                            ***
## defined_populations_simplifieddispersal_buffer                            
## defined_populations_simplifiedeco_biogeo_proxies                          
## defined_populations_simplifiedgenetic_clusters                         *  
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      *  
## defined_populations_simplifiedgenetic_clusters geographic_boundaries   *  
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    
## defined_populations_simplifiedgeographic_boundaries management_units   .  
## defined_populations_simplifiedmanagement_units                            
## defined_populations_simplifiedother                                    .  
## defined_populations_simplifiedother_combinations                       .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

we next investigated whether the relationships between methods and the indicator were moderated by the role of number of populations and species range. Ie:

Does method still influence indicator1 if we control for number of populations?

m.c2 <- glmmTMB(indicator1 ~ defined_populations_simplified + n_extant_populations + (1|country_assessment), family = "ordbeta", data = data_for_model)

summary(m.c2)
##  Family: ordbeta  ( logit )
## Formula:          
## indicator1 ~ defined_populations_simplified + n_extant_populations +  
##     (1 | country_assessment)
## Data: data_for_model
## 
##      AIC      BIC   logLik deviance df.resid 
##   1076.2   1146.2   -522.1   1044.2      569 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.05326  0.2308  
## Number of obs: 585, groups:  country_assessment, 9
## 
## Dispersion parameter for ordbeta family (): 4.28 
## 
## Conditional model:
##                                                                         Estimate
## (Intercept)                                                            -0.747106
## defined_populations_simplifieddispersal_buffer                          0.168023
## defined_populations_simplifiedeco_biogeo_proxies                       -0.130620
## defined_populations_simplifiedgenetic_clusters                          0.434933
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       0.979502
## defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.471630
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.124338
## defined_populations_simplifiedgeographic_boundaries management_units    0.616210
## defined_populations_simplifiedmanagement_units                         -0.065216
## defined_populations_simplifiedother                                     0.961420
## defined_populations_simplifiedother_combinations                        0.356210
## n_extant_populations                                                   -0.004787
##                                                                        Std. Error
## (Intercept)                                                              0.167921
## defined_populations_simplifieddispersal_buffer                           0.284721
## defined_populations_simplifiedeco_biogeo_proxies                         0.306985
## defined_populations_simplifiedgenetic_clusters                           0.229850
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        0.425920
## defined_populations_simplifiedgenetic_clusters geographic_boundaries     0.241430
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   0.320972
## defined_populations_simplifiedgeographic_boundaries management_units     0.325202
## defined_populations_simplifiedmanagement_units                           0.405594
## defined_populations_simplifiedother                                      0.613489
## defined_populations_simplifiedother_combinations                         0.196025
## n_extant_populations                                                     0.001795
##                                                                        z value
## (Intercept)                                                             -4.449
## defined_populations_simplifieddispersal_buffer                           0.590
## defined_populations_simplifiedeco_biogeo_proxies                        -0.425
## defined_populations_simplifiedgenetic_clusters                           1.892
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        2.300
## defined_populations_simplifiedgenetic_clusters geographic_boundaries     1.953
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  -0.387
## defined_populations_simplifiedgeographic_boundaries management_units     1.895
## defined_populations_simplifiedmanagement_units                          -0.161
## defined_populations_simplifiedother                                      1.567
## defined_populations_simplifiedother_combinations                         1.817
## n_extant_populations                                                    -2.667
##                                                                        Pr(>|z|)
## (Intercept)                                                            8.62e-06
## defined_populations_simplifieddispersal_buffer                          0.55510
## defined_populations_simplifiedeco_biogeo_proxies                        0.67048
## defined_populations_simplifiedgenetic_clusters                          0.05846
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       0.02146
## defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.05076
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  0.69848
## defined_populations_simplifiedgeographic_boundaries management_units    0.05811
## defined_populations_simplifiedmanagement_units                          0.87226
## defined_populations_simplifiedother                                     0.11708
## defined_populations_simplifiedother_combinations                        0.06919
## n_extant_populations                                                    0.00765
##                                                                           
## (Intercept)                                                            ***
## defined_populations_simplifieddispersal_buffer                            
## defined_populations_simplifiedeco_biogeo_proxies                          
## defined_populations_simplifiedgenetic_clusters                         .  
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      *  
## defined_populations_simplifiedgenetic_clusters geographic_boundaries   .  
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    
## defined_populations_simplifiedgeographic_boundaries management_units   .  
## defined_populations_simplifiedmanagement_units                            
## defined_populations_simplifiedother                                       
## defined_populations_simplifiedother_combinations                       .  
## n_extant_populations                                                   ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We then tested if there a relationship between number of maintained populations and indicator1, overall, and/or with some methods? (model associated to plot psupB)

Prepare data for model (remove outliers and NA in desired variable) and check n:

# remove missing data 
data_for_model<-indicators_full %>% 
                      filter(!is.na(indicator1)) %>%
                      filter(!is.na(n_extant_populations)) %>%
                      filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots

# check number of methods
length(unique(data_for_model$defined_populations_simplified))
## [1] 11
# check n per method
table(data_for_model$defined_populations_simplified)
## 
##                         dispersal_buffer 
##                                      138 
##                       eco_biogeo_proxies 
##                                       18 
##                         genetic_clusters 
##                                       58 
##      genetic_clusters eco_biogeo_proxies 
##                                        8 
##   genetic_clusters geographic_boundaries 
##                                       41 
##                    geographic_boundaries 
##                                      159 
## geographic_boundaries eco_biogeo_proxies 
##                                       56 
##   geographic_boundaries management_units 
##                                       20 
##                         management_units 
##                                       13 
##                                    other 
##                                        6 
##                       other_combinations 
##                                       68
# total n
nrow(data_for_model)
## [1] 585
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
                                                       ref="geographic_boundaries")

Does the number of maintained pops alone affect the Ne indicator? (i.e. not controlling for method)

msupB1<-glmmTMB(indicator1 ~ n_extant_populations + (1|country_assessment), family = "ordbeta", data= data_for_model)

Summary:

summary(msupB1)
##  Family: ordbeta  ( logit )
## Formula:          indicator1 ~ n_extant_populations + (1 | country_assessment)
## Data: data_for_model
## 
##      AIC      BIC   logLik deviance df.resid 
##   1073.1   1099.4   -530.6   1061.1      579 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.05368  0.2317  
## Number of obs: 585, groups:  country_assessment, 9
## 
## Dispersion parameter for ordbeta family (): 4.08 
## 
## Conditional model:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.523723   0.119543  -4.381 1.18e-05 ***
## n_extant_populations -0.005059   0.001786  -2.832  0.00462 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Does the effect of method depend on the number of populations? Or put another way, does the importance of number of populations also depend on method?

# run model
msupB2 <- glmmTMB(indicator1 ~ defined_populations_simplified + n_extant_populations + defined_populations_simplified*n_extant_populations + (1|country_assessment), family = "ordbeta", data = data_for_model)
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, : NA/
## NaN function evaluation

Summary:

summary(msupB2)
##  Family: ordbeta  ( logit )
## Formula:          
## indicator1 ~ defined_populations_simplified + n_extant_populations +  
##     defined_populations_simplified * n_extant_populations + (1 |  
##     country_assessment)
## Data: data_for_model
## 
##      AIC      BIC   logLik deviance df.resid 
##   1073.3   1187.0   -510.7   1021.3      559 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.08117  0.2849  
## Number of obs: 585, groups:  country_assessment, 9
## 
## Dispersion parameter for ordbeta family (): 4.61 
## 
## Conditional model:
##                                                                                              Estimate
## (Intercept)                                                                                 -0.852589
## defined_populations_simplifieddispersal_buffer                                               0.334354
## defined_populations_simplifiedeco_biogeo_proxies                                             0.153242
## defined_populations_simplifiedgenetic_clusters                                               1.245572
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                            1.039658
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                         0.679947
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                       0.657931
## defined_populations_simplifiedgeographic_boundaries management_units                         0.977634
## defined_populations_simplifiedmanagement_units                                               0.393156
## defined_populations_simplifiedother                                                         -0.833994
## defined_populations_simplifiedother_combinations                                             0.453543
## n_extant_populations                                                                        -0.001760
## defined_populations_simplifieddispersal_buffer:n_extant_populations                         -0.003768
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations                       -0.013816
## defined_populations_simplifiedgenetic_clusters:n_extant_populations                         -0.173984
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations       0.010480
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations   -0.015563
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations -0.111981
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations   -0.029675
## defined_populations_simplifiedmanagement_units:n_extant_populations                         -0.054893
## defined_populations_simplifiedother:n_extant_populations                                     0.726473
## defined_populations_simplifiedother_combinations:n_extant_populations                       -0.003624
##                                                                                             Std. Error
## (Intercept)                                                                                   0.184087
## defined_populations_simplifieddispersal_buffer                                                0.281372
## defined_populations_simplifiedeco_biogeo_proxies                                              0.405021
## defined_populations_simplifiedgenetic_clusters                                                0.359940
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                             0.628292
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                          0.305385
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                        0.492350
## defined_populations_simplifiedgeographic_boundaries management_units                          0.412312
## defined_populations_simplifiedmanagement_units                                                0.606730
## defined_populations_simplifiedother                                                           1.578475
## defined_populations_simplifiedother_combinations                                              0.205909
## n_extant_populations                                                                          0.002589
## defined_populations_simplifieddispersal_buffer:n_extant_populations                           0.004387
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations                         0.011100
## defined_populations_simplifiedgenetic_clusters:n_extant_populations                           0.064272
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations        0.084200
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations     0.022474
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations   0.058478
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations     0.028637
## defined_populations_simplifiedmanagement_units:n_extant_populations                           0.067364
## defined_populations_simplifiedother:n_extant_populations                                      0.792738
## defined_populations_simplifiedother_combinations:n_extant_populations                         0.003936
##                                                                                             z value
## (Intercept)                                                                                  -4.631
## defined_populations_simplifieddispersal_buffer                                                1.188
## defined_populations_simplifiedeco_biogeo_proxies                                              0.378
## defined_populations_simplifiedgenetic_clusters                                                3.460
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                             1.655
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                          2.227
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                        1.336
## defined_populations_simplifiedgeographic_boundaries management_units                          2.371
## defined_populations_simplifiedmanagement_units                                                0.648
## defined_populations_simplifiedother                                                          -0.528
## defined_populations_simplifiedother_combinations                                              2.203
## n_extant_populations                                                                         -0.680
## defined_populations_simplifieddispersal_buffer:n_extant_populations                          -0.859
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations                        -1.245
## defined_populations_simplifiedgenetic_clusters:n_extant_populations                          -2.707
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations        0.124
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations    -0.693
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations  -1.915
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations    -1.036
## defined_populations_simplifiedmanagement_units:n_extant_populations                          -0.815
## defined_populations_simplifiedother:n_extant_populations                                      0.916
## defined_populations_simplifiedother_combinations:n_extant_populations                        -0.921
##                                                                                             Pr(>|z|)
## (Intercept)                                                                                 3.63e-06
## defined_populations_simplifieddispersal_buffer                                              0.234716
## defined_populations_simplifiedeco_biogeo_proxies                                            0.705167
## defined_populations_simplifiedgenetic_clusters                                              0.000539
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                           0.097978
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                        0.025979
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                      0.181449
## defined_populations_simplifiedgeographic_boundaries management_units                        0.017735
## defined_populations_simplifiedmanagement_units                                              0.516991
## defined_populations_simplifiedother                                                         0.597254
## defined_populations_simplifiedother_combinations                                            0.027620
## n_extant_populations                                                                        0.496614
## defined_populations_simplifieddispersal_buffer:n_extant_populations                         0.390425
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations                       0.213261
## defined_populations_simplifiedgenetic_clusters:n_extant_populations                         0.006790
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations      0.900943
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations   0.488612
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations 0.055503
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations   0.300085
## defined_populations_simplifiedmanagement_units:n_extant_populations                         0.415149
## defined_populations_simplifiedother:n_extant_populations                                    0.359452
## defined_populations_simplifiedother_combinations:n_extant_populations                       0.357200
##                                                                                                
## (Intercept)                                                                                 ***
## defined_populations_simplifieddispersal_buffer                                                 
## defined_populations_simplifiedeco_biogeo_proxies                                               
## defined_populations_simplifiedgenetic_clusters                                              ***
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                           .  
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                        *  
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                         
## defined_populations_simplifiedgeographic_boundaries management_units                        *  
## defined_populations_simplifiedmanagement_units                                                 
## defined_populations_simplifiedother                                                            
## defined_populations_simplifiedother_combinations                                            *  
## n_extant_populations                                                                           
## defined_populations_simplifieddispersal_buffer:n_extant_populations                            
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations                          
## defined_populations_simplifiedgenetic_clusters:n_extant_populations                         ** 
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations         
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations      
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations .  
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations      
## defined_populations_simplifiedmanagement_units:n_extant_populations                            
## defined_populations_simplifiedother:n_extant_populations                                       
## defined_populations_simplifiedother_combinations:n_extant_populations                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Because “what’s a population and how do you define them?” is such an important question, we can also test the effect of methods alone. First, subset the data to only those taxa where a single method was used:

ind1_single_methods<-indicators_full %>% 
                      filter(!is.na(indicator1)) %>% 
                      filter(n_extant_populations<500) %>%  # doesn't make a difference in the test below, but useful for 
                      filter(defined_populations_simplified=="genetic_clusters" | 
                             defined_populations_simplified=="geographic_boundaries" |
                             defined_populations_simplified=="eco_biogeo_proxies" | 
                             defined_populations_simplified=="management_units" |
                             defined_populations_simplified=="dispersal_buffer")


# check number of methods
length(unique(ind1_single_methods$defined_populations_simplified))
## [1] 5
# check n by method
table(ind1_single_methods$defined_populations_simplified)
## 
##      dispersal_buffer    eco_biogeo_proxies      genetic_clusters 
##                   138                    18                    58 
## geographic_boundaries      management_units 
##                   159                    13
# check n total
nrow(ind1_single_methods)
## [1] 386
# re-level to use geographic boundaries as reference category for the analysis
ind1_single_methods$defined_populations_simplified<-relevel(as.factor(ind1_single_methods$defined_populations_simplified),
                                                       ref="geographic_boundaries")

Run model:

# run model
msupB3 <- glmmTMB(indicator1 ~ n_extant_populations + defined_populations_simplified +        n_extant_populations*defined_populations_simplified + (1|country_assessment), family = "ordbeta", data = ind1_single_methods)
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, : NA/
## NaN function evaluation

Summary:

summary(msupB3)
##  Family: ordbeta  ( logit )
## Formula:          
## indicator1 ~ n_extant_populations + defined_populations_simplified +  
##     n_extant_populations * defined_populations_simplified + (1 |  
##     country_assessment)
## Data: ind1_single_methods
## 
##      AIC      BIC   logLik deviance df.resid 
##    696.5    751.9   -334.3    668.5      372 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.06442  0.2538  
## Number of obs: 386, groups:  country_assessment, 8
## 
## Dispersion parameter for ordbeta family (): 3.82 
## 
## Conditional model:
##                                                                         Estimate
## (Intercept)                                                           -0.8202422
## n_extant_populations                                                  -0.0004588
## defined_populations_simplifieddispersal_buffer                         0.2073110
## defined_populations_simplifiedeco_biogeo_proxies                       0.1552506
## defined_populations_simplifiedgenetic_clusters                         1.1421148
## defined_populations_simplifiedmanagement_units                         0.3522263
## n_extant_populations:defined_populations_simplifieddispersal_buffer   -0.0035240
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies -0.0119132
## n_extant_populations:defined_populations_simplifiedgenetic_clusters   -0.1530545
## n_extant_populations:defined_populations_simplifiedmanagement_units   -0.0501060
##                                                                       Std. Error
## (Intercept)                                                            0.2069251
## n_extant_populations                                                   0.0026755
## defined_populations_simplifieddispersal_buffer                         0.4011019
## defined_populations_simplifiedeco_biogeo_proxies                       0.4211192
## defined_populations_simplifiedgenetic_clusters                         0.3761327
## defined_populations_simplifiedmanagement_units                         0.6106084
## n_extant_populations:defined_populations_simplifieddispersal_buffer    0.0043892
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies  0.0110470
## n_extant_populations:defined_populations_simplifiedgenetic_clusters    0.0678790
## n_extant_populations:defined_populations_simplifiedmanagement_units    0.0661245
##                                                                       z value
## (Intercept)                                                            -3.964
## n_extant_populations                                                   -0.171
## defined_populations_simplifieddispersal_buffer                          0.517
## defined_populations_simplifiedeco_biogeo_proxies                        0.369
## defined_populations_simplifiedgenetic_clusters                          3.036
## defined_populations_simplifiedmanagement_units                          0.577
## n_extant_populations:defined_populations_simplifieddispersal_buffer    -0.803
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies  -1.078
## n_extant_populations:defined_populations_simplifiedgenetic_clusters    -2.255
## n_extant_populations:defined_populations_simplifiedmanagement_units    -0.758
##                                                                       Pr(>|z|)
## (Intercept)                                                           7.37e-05
## n_extant_populations                                                   0.86384
## defined_populations_simplifieddispersal_buffer                         0.60526
## defined_populations_simplifiedeco_biogeo_proxies                       0.71238
## defined_populations_simplifiedgenetic_clusters                         0.00239
## defined_populations_simplifiedmanagement_units                         0.56404
## n_extant_populations:defined_populations_simplifieddispersal_buffer    0.42205
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies  0.28085
## n_extant_populations:defined_populations_simplifiedgenetic_clusters    0.02415
## n_extant_populations:defined_populations_simplifiedmanagement_units    0.44860
##                                                                          
## (Intercept)                                                           ***
## n_extant_populations                                                     
## defined_populations_simplifieddispersal_buffer                           
## defined_populations_simplifiedeco_biogeo_proxies                         
## defined_populations_simplifiedgenetic_clusters                        ** 
## defined_populations_simplifiedmanagement_units                           
## n_extant_populations:defined_populations_simplifieddispersal_buffer      
## n_extant_populations:defined_populations_simplifiedeco_biogeo_proxies    
## n_extant_populations:defined_populations_simplifiedgenetic_clusters   *  
## n_extant_populations:defined_populations_simplifiedmanagement_units      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Finally, we tested for associations between range type on Ne>500 indicator.

First filter data to consider only wide ranging and restricted categories (ie remove unknown due to small sampling size)

## Remove unknown
data<- indicators_averaged_one  %>%
                    filter(!is.na(indicator1_mean)) %>% 
                    filter(species_range !="unknown")

# summary of indicator
summary(data$indicator1_mean)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.2698  0.5000  1.0000
# re-level to use geographic boundaries as reference category for the analysis
data$defined_populations_simplified<-relevel(as.factor(data$defined_populations_simplified),
                                                       ref="geographic_boundaries")

# make sure specis range is a factor
data$species_range<-as.factor(data$species_range)

Is there still an effect of method on indicator1 if we control for species range?

## run model  + country
m.c3 <- glmmTMB(indicator1_mean ~ defined_populations_simplified + species_range + (1|country_assessment), family = "ordbeta", data = data)


# summary results
summary(m.c3)
##  Family: ordbeta  ( logit )
## Formula:          
## indicator1_mean ~ defined_populations_simplified + species_range +  
##     (1 | country_assessment)
## Data: data
## 
##      AIC      BIC   logLik deviance df.resid 
##    996.1   1065.0   -482.1    964.1      530 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.1099   0.3316  
## Number of obs: 546, groups:  country_assessment, 9
## 
## Dispersion parameter for ordbeta family (): 3.86 
## 
## Conditional model:
##                                                                        Estimate
## (Intercept)                                                             -1.0882
## defined_populations_simplifieddispersal_buffer                           0.1366
## defined_populations_simplifiedeco_biogeo_proxies                        -0.2520
## defined_populations_simplifiedgenetic_clusters                           0.6945
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        0.8027
## defined_populations_simplifiedgenetic_clusters geographic_boundaries     0.3713
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  -0.2095
## defined_populations_simplifiedgeographic_boundaries management_units     0.3842
## defined_populations_simplifiedmanagement_units                          -0.1487
## defined_populations_simplifiedother                                      1.0211
## defined_populations_simplifiedother_combinations                         0.2036
## species_rangewide ranging                                                0.5804
##                                                                        Std. Error
## (Intercept)                                                                0.1954
## defined_populations_simplifieddispersal_buffer                             0.2835
## defined_populations_simplifiedeco_biogeo_proxies                           0.3498
## defined_populations_simplifiedgenetic_clusters                             0.2596
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies          0.4535
## defined_populations_simplifiedgenetic_clusters geographic_boundaries       0.2513
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies     0.3640
## defined_populations_simplifiedgeographic_boundaries management_units       0.3077
## defined_populations_simplifiedmanagement_units                             0.4332
## defined_populations_simplifiedother                                        0.6165
## defined_populations_simplifiedother_combinations                           0.2106
## species_rangewide ranging                                                  0.1359
##                                                                        z value
## (Intercept)                                                             -5.569
## defined_populations_simplifieddispersal_buffer                           0.482
## defined_populations_simplifiedeco_biogeo_proxies                        -0.720
## defined_populations_simplifiedgenetic_clusters                           2.676
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        1.770
## defined_populations_simplifiedgenetic_clusters geographic_boundaries     1.478
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  -0.576
## defined_populations_simplifiedgeographic_boundaries management_units     1.248
## defined_populations_simplifiedmanagement_units                          -0.343
## defined_populations_simplifiedother                                      1.656
## defined_populations_simplifiedother_combinations                         0.967
## species_rangewide ranging                                                4.270
##                                                                        Pr(>|z|)
## (Intercept)                                                            2.56e-08
## defined_populations_simplifieddispersal_buffer                          0.62996
## defined_populations_simplifiedeco_biogeo_proxies                        0.47126
## defined_populations_simplifiedgenetic_clusters                          0.00746
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       0.07673
## defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.13950
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  0.56493
## defined_populations_simplifiedgeographic_boundaries management_units    0.21187
## defined_populations_simplifiedmanagement_units                          0.73148
## defined_populations_simplifiedother                                     0.09768
## defined_populations_simplifiedother_combinations                        0.33362
## species_rangewide ranging                                              1.96e-05
##                                                                           
## (Intercept)                                                            ***
## defined_populations_simplifieddispersal_buffer                            
## defined_populations_simplifiedeco_biogeo_proxies                          
## defined_populations_simplifiedgenetic_clusters                         ** 
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      .  
## defined_populations_simplifiedgenetic_clusters geographic_boundaries      
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    
## defined_populations_simplifiedgeographic_boundaries management_units      
## defined_populations_simplifiedmanagement_units                            
## defined_populations_simplifiedother                                    .  
## defined_populations_simplifiedother_combinations                          
## species_rangewide ranging                                              ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Finally, we tested interactions between method and species range, to determine whether the effect of species range only applies when some methods are used.

Is the effect of method on Ne indicator moderated by species range?

## run model 
m.c4 <- glmmTMB(indicator1_mean ~ defined_populations_simplified + species_range + defined_populations_simplified*species_range + (1|country_assessment), family = "ordbeta", data = data)


# summary results
summary(m.c4)
##  Family: ordbeta  ( logit )
## Formula:          
## indicator1_mean ~ defined_populations_simplified + species_range +  
##     defined_populations_simplified * species_range + (1 | country_assessment)
## Data: data
## 
##      AIC      BIC   logLik deviance df.resid 
##    986.3   1098.2   -467.2    934.3      520 
## 
## Random effects:
## 
## Conditional model:
##  Groups             Name        Variance Std.Dev.
##  country_assessment (Intercept) 0.1448   0.3806  
## Number of obs: 546, groups:  country_assessment, 9
## 
## Dispersion parameter for ordbeta family (): 4.28 
## 
## Conditional model:
##                                                                                                    Estimate
## (Intercept)                                                                                      -1.132e+00
## defined_populations_simplifieddispersal_buffer                                                    3.897e-01
## defined_populations_simplifiedeco_biogeo_proxies                                                  1.159e-01
## defined_populations_simplifiedgenetic_clusters                                                    2.089e-01
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                -1.694e+01
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                             -6.287e-02
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                           -4.092e-01
## defined_populations_simplifiedgeographic_boundaries management_units                              5.794e-01
## defined_populations_simplifiedmanagement_units                                                   -2.115e+01
## defined_populations_simplifiedother                                                               5.774e-01
## defined_populations_simplifiedother_combinations                                                  7.044e-01
## species_rangewide ranging                                                                         6.077e-01
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging                         -3.304e-01
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging                       -9.104e-01
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging                          9.516e-01
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging       1.800e+01
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging    7.208e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging  4.872e-01
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging   -3.476e-01
## defined_populations_simplifiedmanagement_units:species_rangewide ranging                          2.125e+01
## defined_populations_simplifiedother:species_rangewide ranging                                     2.562e+01
## defined_populations_simplifiedother_combinations:species_rangewide ranging                       -8.039e-01
##                                                                                                  Std. Error
## (Intercept)                                                                                       2.139e-01
## defined_populations_simplifieddispersal_buffer                                                    3.600e-01
## defined_populations_simplifiedeco_biogeo_proxies                                                  4.208e-01
## defined_populations_simplifiedgenetic_clusters                                                    3.562e-01
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                 4.435e+03
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                              3.843e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                            4.536e-01
## defined_populations_simplifiedgeographic_boundaries management_units                              4.105e-01
## defined_populations_simplifiedmanagement_units                                                    2.239e+04
## defined_populations_simplifiedother                                                               6.554e-01
## defined_populations_simplifiedother_combinations                                                  2.766e-01
## species_rangewide ranging                                                                         2.563e-01
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging                          3.722e-01
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging                        6.851e-01
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging                          5.013e-01
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging       4.435e+03
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging    5.237e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging  7.121e-01
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging    6.100e-01
## defined_populations_simplifiedmanagement_units:species_rangewide ranging                          2.239e+04
## defined_populations_simplifiedother:species_rangewide ranging                                     1.821e+05
## defined_populations_simplifiedother_combinations:species_rangewide ranging                        3.930e-01
##                                                                                                  z value
## (Intercept)                                                                                       -5.294
## defined_populations_simplifieddispersal_buffer                                                     1.082
## defined_populations_simplifiedeco_biogeo_proxies                                                   0.275
## defined_populations_simplifiedgenetic_clusters                                                     0.586
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                 -0.004
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                              -0.164
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                            -0.902
## defined_populations_simplifiedgeographic_boundaries management_units                               1.411
## defined_populations_simplifiedmanagement_units                                                    -0.001
## defined_populations_simplifiedother                                                                0.881
## defined_populations_simplifiedother_combinations                                                   2.547
## species_rangewide ranging                                                                          2.371
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging                          -0.888
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging                        -1.329
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging                           1.898
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging        0.004
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging     1.376
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging   0.684
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging    -0.570
## defined_populations_simplifiedmanagement_units:species_rangewide ranging                           0.001
## defined_populations_simplifiedother:species_rangewide ranging                                      0.000
## defined_populations_simplifiedother_combinations:species_rangewide ranging                        -2.046
##                                                                                                  Pr(>|z|)
## (Intercept)                                                                                       1.2e-07
## defined_populations_simplifieddispersal_buffer                                                     0.2791
## defined_populations_simplifiedeco_biogeo_proxies                                                   0.7830
## defined_populations_simplifiedgenetic_clusters                                                     0.5576
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                  0.9970
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                               0.8701
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                             0.3670
## defined_populations_simplifiedgeographic_boundaries management_units                               0.1581
## defined_populations_simplifiedmanagement_units                                                     0.9992
## defined_populations_simplifiedother                                                                0.3783
## defined_populations_simplifiedother_combinations                                                   0.0109
## species_rangewide ranging                                                                          0.0178
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging                           0.3747
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging                         0.1839
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging                           0.0577
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging        0.9968
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging     0.1687
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging   0.4939
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging     0.5688
## defined_populations_simplifiedmanagement_units:species_rangewide ranging                           0.9992
## defined_populations_simplifiedother:species_rangewide ranging                                      0.9999
## defined_populations_simplifiedother_combinations:species_rangewide ranging                         0.0408
##                                                                                                     
## (Intercept)                                                                                      ***
## defined_populations_simplifieddispersal_buffer                                                      
## defined_populations_simplifiedeco_biogeo_proxies                                                    
## defined_populations_simplifiedgenetic_clusters                                                      
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                   
## defined_populations_simplifiedgenetic_clusters geographic_boundaries                                
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                              
## defined_populations_simplifiedgeographic_boundaries management_units                                
## defined_populations_simplifiedmanagement_units                                                      
## defined_populations_simplifiedother                                                                 
## defined_populations_simplifiedother_combinations                                                 *  
## species_rangewide ranging                                                                        *  
## defined_populations_simplifieddispersal_buffer:species_rangewide ranging                            
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging                          
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging                         .  
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging         
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging      
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging    
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging      
## defined_populations_simplifiedmanagement_units:species_rangewide ranging                            
## defined_populations_simplifiedother:species_rangewide ranging                                       
## defined_populations_simplifiedother_combinations:species_rangewide ranging                       *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Main Figure (representing the models above): Single plot 5 panels. Boxplots plots for the effect of method on: number of populations, proportion of maintained populations (indicator 2) and Proportion of populations with Ne>500 (indicator 1), AND Violin plots for the distribution of the indicator values by range type.

Top a,b,c panel boxplots:

##### plot for Proportion of maintained populations (indicator 2) only with n in axis labels

# sample size 
sample_size <- indicators_full %>%
                    filter(!is.na(indicator2)) %>% 
                    filter(n_extant_populations<500) %>% 
                    group_by(defined_populations_nicenames) %>% summarize(num=n())

# custom axis
## new dataframe
df<-indicators_full %>% 
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator2)) %>% 
    # add sampling size 
  left_join(sample_size) %>%
  mutate(myaxis = as.factor(paste0(defined_populations_nicenames, " (n= ", num, ")")))
## Joining, by = "defined_populations_nicenames"
pb.1<- df %>%
  filter(n_extant_populations<500) %>%
  ggplot(aes(x=myaxis, y=indicator2, color=defined_populations_nicenames,    
                                     fill=defined_populations_nicenames)) +
          geom_boxplot() + xlab("") + ylab("Proportion of maintained populations within species") +
          geom_jitter(size=.4, width = 0.1, color="black") +
  coord_flip() +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="none",
        plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "cm")) + # this is used to decrease the space between plots) 
  scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  
  scale_x_discrete(limits=rev, 
                   labels= rev(sub(".*(\\(n= \\d+\\))", "\\1", levels(df$myaxis)))) + # extract "(n = number)") and show them in reverse order
  theme(text = element_text(size = 13))


##### plot for Proportion populations Ne>500 (indicator 1) only with n in axis labels
# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
  filter(!is.na(indicator1)) %>% 
  filter(n_extant_populations<500) %>% 
  group_by(defined_populations_nicenames) %>% summarize(num=n())

# custom axis
## new dataframe
df<-indicators_full %>% 
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator1)) %>% 
  # add sampling size 
  left_join(sample_size) %>%
  mutate(myaxis = as.factor(paste0(defined_populations_nicenames, " (n= ", num, ")")))
## Joining, by = "defined_populations_nicenames"
## plot 
pc.1<- df %>%
  ggplot(aes(x=myaxis, y=indicator1, color=defined_populations_nicenames,    
             fill=defined_populations_nicenames)) +
  geom_boxplot() + xlab("") + ylab("Proportion of populations within species with Ne>500") +
  geom_jitter(size=.4, width = 0.1, color="black") +
  coord_flip() +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="none",
        plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "cm")) + # this is used to decrease the space between plots) 
  scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_color_manual(values=simplifiedmethods_colors,
                     breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_x_discrete(limits=rev, 
                   labels= rev(sub(".*(\\(n= \\d+\\))", "\\1", levels(df$myaxis)))) + # extract "(n = number)") and show them in reverse order
  theme(text = element_text(size = 13))


## Plot 3 panels
plot_grid(pa, pb.1, pc.1, ncol=3, rel_widths = c(1.9,1,1), align = "h", labels=c("a)", "b)", "c)"))  

Bottom d, e violin plots. Indicators by of range type coloring points to show genetic clusters

For PM indicator:

# add variable stating if genetic methods are used
indicators_averaged_one<- indicators_averaged_one  %>%
mutate(genetic_to_define_pops = ifelse(grepl("genetic", defined_populations_simplified), 'genetic method', 'non genetic'))


# get sample size by desired category
sample_size <- indicators_averaged_one  %>%
                    filter(!is.na(indicator2_mean)) %>% 
                    filter(!is.na(species_range)) %>% 
                    group_by(species_range) %>% summarize(num=n())

# plot
pd<-indicators_averaged_one %>% 
    filter(!is.na(indicator2_mean)) %>% 
    filter(!is.na(species_range)) %>% 
  
  # add sampling size 
  left_join(sample_size) %>% 
  mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%

  # plot
  ggplot(aes(x=myaxis, y=indicator2_mean)) +
      geom_violin(width=1, linewidth = 0, fill="grey70")  +
      xlab("") + ylab("Proportion of maintained populations within species") +
      coord_flip() +
    new_scale_color() + # to color points without confuisng ggplot
    geom_jitter(size=1.2, width = 0.1, aes(color = genetic_to_define_pops)) +
    scale_color_manual(values=c("red", "black")) +
    labs(color=NULL) + # hide legend title
      theme_light() +
      theme(panel.border = element_blank(), legend.position="none", text= element_text(size=20))
## Joining, by = "species_range"

For Ne indicator:

# add variable stating if genetic methods are used
indicators_averaged_one<- indicators_averaged_one  %>%
mutate(genetic_to_define_pops = ifelse(grepl("genetic", defined_populations_simplified), 'genetic method', 'non genetic'))


# get sample size by desired category
sample_size <- indicators_averaged_one  %>%
                    filter(!is.na(indicator1_mean)) %>% 
                    filter(!is.na(species_range)) %>% 
                    group_by(species_range) %>% summarize(num=n())

# plot
pe <- indicators_averaged_one %>% 
    filter(!is.na(indicator1_mean)) %>% 
    filter(!is.na(species_range)) %>% 
  
  # add sampling size 
  left_join(sample_size) %>% 
  mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%

  # plot
  ggplot(aes(x=myaxis, y=indicator1_mean)) +
      geom_violin(width=1, linewidth = 0, fill="grey70")  +
      xlab("") + ylab("Proportion of populations within species with Ne>500") +
      coord_flip() +
    new_scale_color() + # to color the points without confusing ggplot
    geom_jitter(size=1.2, width = 0.1, aes(color = genetic_to_define_pops)) +
    scale_color_manual(values=c("red", "black")) + 
    theme_light() +
    labs(color="Method to \ndefine populations") + # nicer legend title
    theme(panel.border = element_blank(), legend.position="right", text= element_text(size=20))
## Joining, by = "species_range"

Two panel figure:

plot_grid(pd + theme(legend.position = "non2"), # legend can be shown only below both plots 
          pe,
          ncol = 2,
          rel_widths = c(1,1.4), align = "h", labels=c("d)", "e)"))

Supplementary Figure: Single figure 2 panels scatter plots number of populations vs indicators

# plot
plot_grid(psupA + xlim(0,400) + xlab(""), # remove xlab from top plot and match x axis size
          psupB+ xlim(0,400), 
          ncol=1, align = "v", labels=c("a)", "b)"))  

Indicatros by threat status (IUCN Red List)

All the following plots and analyses consider the average of multiassessed species (variable _mean), so that they are shown only once.

(a) Ne > 500 indicator and red list status

Plot indicator 1 by global IUCN in the entire dataset:

## Global IUCN
## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
               filter(!is.na(indicator1_mean)) %>% 
               filter(!is.na(global_IUCN)) %>% 
               group_by(global_IUCN) %>% summarize(num=n())

# new df 
df<- indicators_averaged_one %>% 
     filter(!is.na(indicator1_mean)) %>% 
     filter(!is.na(global_IUCN)) %>% 
        # add sampling size 
        left_join(sample_size) %>%
        mutate(myaxis = paste0(global_IUCN, " (n= ", num, ")"))
## Joining, by = "global_IUCN"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis, 
                  #grep is used below to get the sample size, which may change depending on the data
                  levels=c(grep("cr", unique(df$myaxis), value = TRUE),
                          grep("en", unique(df$myaxis), value = TRUE),
                          grep("vu", unique(df$myaxis), value = TRUE),
                          grep("nt", unique(df$myaxis), value = TRUE),
                          grep("lc", unique(df$myaxis), value = TRUE),
                          grep("dd", unique(df$myaxis), value = TRUE),
                          grep("not_assessed", unique(df$myaxis), value = TRUE),
                          grep("unknown", unique(df$myaxis), value = TRUE)))

df$global_IUCN<-factor(df$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))

      
# plot
p1<-df %>%
    ggplot(aes(x=myaxis, y=indicator1_mean , fill=global_IUCN)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of populations within species with Ne>500") +
      coord_flip() +
      scale_fill_manual(values= IUCNcolors, # iucn color codes
                        breaks=c(levels(df$global_IUCN))) +
      scale_x_discrete(limits=rev) +
      theme_light() +
      theme(panel.border = element_blank(), legend.position="none", 
            plot.title = element_text(hjust = 0.5), # center title
            text= element_text(size=15))
p1

Summary table:

x  <- indicators_averaged_one %>%
               filter(!is.na(indicator1_mean)) %>% 
               filter(!is.na(global_IUCN)) %>% 
               group_by(global_IUCN) %>% 
               summarize(n=n(),
                         mean=mean(indicator1_mean),
                         median=median(indicator1_mean),
                         per.0=sum(indicator1_mean==0) / n *100,
                         per.below.25=sum(indicator1_mean<0.25) / n *100,
                         per.below.90=sum(indicator1_mean<0.90) / n *100,
                         per.above.75=sum(indicator1_mean>0.75)/ n *100,
                         per1=sum(indicator1_mean==1) / n *100)


kable(x, digits=2)
global_IUCN n mean median per.0 per.below.25 per.below.90 per.above.75 per1
cr 46 0.11 0.00 84.78 86.96 93.48 6.52 6.52
dd 10 0.44 0.21 40.00 50.00 60.00 40.00 40.00
en 48 0.25 0.00 66.67 70.83 81.25 18.75 18.75
lc 186 0.37 0.05 47.31 55.91 72.04 29.03 27.96
not_assessed 156 0.19 0.00 63.46 74.36 91.03 8.97 8.97
nt 51 0.24 0.00 54.90 72.55 84.31 15.69 15.69
unknown 3 0.67 1.00 33.33 33.33 33.33 66.67 66.67
vu 66 0.32 0.00 56.06 59.09 78.79 22.73 21.21

Indicator 1 by country and global IUCN

## change order of levels so that categories match with the order of colors
indicators_averaged_one$global_IUCN<-factor(indicators_averaged_one$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))


# plot
indicators_averaged_one %>% 
  filter(!is.na(regional_redlist)) %>%
  # plot
  ggplot(aes(x=global_IUCN, y=indicator1_mean, fill=global_IUCN)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of populations within species with Ne>500") +
      coord_flip() +
      scale_fill_manual(values= IUCNcolors, # iucn color codes
                    breaks=c(levels(indicators_averaged_one$global_IUCN))) +
      scale_x_discrete(limits=rev) +
      theme_light() +
      ggtitle("global IUCN Redlist") +
      theme(panel.border = element_blank(), legend.position="none", 
            plot.title = element_text(hjust = 0.5), # center title
            text= element_text(size=13)) +
      facet_wrap(~country_assessment, ncol = 3) +
      theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 342 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 342 rows containing missing values (`geom_point()`).

Indicator1 by regional IUCN Redlist, excluding US, Australia and Mexico becasue they don’t have a regional IUCN redlist.

## change order of levels so that categories match with the order of colors
indicators_averaged_one$regional_redlist<-factor(indicators_averaged_one$regional_redlist, levels=c("re","cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))

# plot
indicators_averaged_one %>% 
  # filter US and Mx
  filter(country_assessment %!in% c("Mexico", "US", "Australia")) %>%
  filter(!is.na(regional_redlist)) %>%
  
  # plot
  ggplot(aes(x=regional_redlist, y=indicator1_mean, fill=regional_redlist)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of populations within species with Ne>500") +
      coord_flip() +
      scale_fill_manual(values= IUCNcolors_regional, # iucn color codes
                    breaks=c(levels(indicators_averaged_one$regional_redlist))) +
      scale_x_discrete(limits=rev) +
      theme_light() +
      ggtitle("regional IUCN Redlist") +
      theme(panel.border = element_blank(), legend.position="none", 
            plot.title = element_text(hjust = 0.5), # center title
            text= element_text(size=15)) +
      facet_wrap(~country_assessment, ncol = 3) +
      theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 171 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 171 rows containing missing values (`geom_point()`).

(b) Proportion of Maintained Populations and red list status?

Plot indicator 2 by global IUCN in the entire dataset:

## Global IUCN
## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
               filter(!is.na(indicator2_mean)) %>% 
               filter(!is.na(global_IUCN)) %>% 
               group_by(global_IUCN) %>% summarize(num=n())

# new df 
df<- indicators_averaged_one %>% 
     filter(!is.na(indicator2_mean)) %>% 
     filter(!is.na(global_IUCN)) %>% 
        # add sampling size 
        left_join(sample_size) %>%
        mutate(myaxis = paste0(global_IUCN, " (n= ", num, ")"))
## Joining, by = "global_IUCN"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis, 
                  #grep is used below to get the sample size, which may change depending on the data
                  levels=c(grep("cr", unique(df$myaxis), value = TRUE),
                          grep("en", unique(df$myaxis), value = TRUE),
                          grep("vu", unique(df$myaxis), value = TRUE),
                          grep("nt", unique(df$myaxis), value = TRUE),
                          grep("lc", unique(df$myaxis), value = TRUE),
                          grep("dd", unique(df$myaxis), value = TRUE),
                          grep("not_assessed", unique(df$myaxis), value = TRUE),
                          grep("unknown", unique(df$myaxis), value = TRUE)))

df$global_IUCN<-factor(df$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))

      
# plot
p2<-df %>%
    ggplot(aes(x=myaxis, y=indicator2 , fill=global_IUCN)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of maintained populations within species") +
      coord_flip() +
      scale_fill_manual(values= IUCNcolors, # iucn color codes
                        breaks=c(levels(df$global_IUCN))) +
      scale_x_discrete(limits=rev) +
      theme_light() +
      theme(panel.border = element_blank(), legend.position="none", 
            plot.title = element_text(hjust = 0.5), # center title
            text= element_text(size=15))
p2
## Warning: Removed 2 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).

Summary table:

x  <- indicators_averaged_one %>%
  filter(!is.na(indicator2_mean)) %>% 
  filter(!is.na(global_IUCN)) %>% 
  group_by(global_IUCN) %>% 
  summarize(n=n(),
            mean=mean(indicator2_mean),
            median=median(indicator2_mean),
            per.0=sum(indicator2_mean==0) / n *100,
            per.below.25=sum(indicator2_mean<0.25) / n *100,
            per.below.90=sum(indicator2_mean<0.90) / n *100,
            per.above.75=sum(indicator2_mean>0.75)/ n *100,
            per1=sum(indicator2_mean==1) / n *100)


kable(x, digits=2)
global_IUCN n mean median per.0 per.below.25 per.below.90 per.above.75 per1
cr 36 0.83 1.00 0.00 5.56 36.11 75.00 63.89
en 59 0.79 0.86 0.00 1.69 50.85 61.02 49.15
vu 65 0.78 0.91 1.54 3.08 49.23 60.00 44.62
nt 42 0.83 1.00 0.00 4.76 38.10 71.43 57.14
lc 152 0.84 1.00 0.66 3.29 34.21 72.37 61.18
dd 9 0.71 0.83 0.00 0.00 66.67 66.67 33.33
not_assessed 153 0.84 0.95 0.65 1.96 40.52 69.93 48.37
unknown 2 1.00 1.00 0.00 0.00 0.00 100.00 100.00

Indicator 2 by country and global IUCN

## change order of levels so that categories match with the order of colors
indicators_averaged_one$global_IUCN<-factor(indicators_averaged_one$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))


# plot
indicators_averaged_one %>% 
  filter(!is.na(regional_redlist)) %>%
  # plot
  ggplot(aes(x=global_IUCN, y=indicator2_mean, fill=global_IUCN)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of maintained populations within species") +
      coord_flip() +
      scale_fill_manual(values= IUCNcolors, # iucn color codes
                    breaks=c(levels(indicators_averaged_one$global_IUCN))) +
      scale_x_discrete(limits=rev) +
      theme_light() +
      ggtitle("global IUCN Redlist") +
      theme(panel.border = element_blank(), legend.position="none", 
            plot.title = element_text(hjust = 0.5), # center title
            text= element_text(size=13)) +
      facet_wrap(~country_assessment, ncol = 3) +
      theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 390 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 390 rows containing missing values (`geom_point()`).

Main Figure: Single plot 2 pannels IUCN redlist and indicator range values

plot_grid(p1,
          p2,
          ncol=1, align = "v", labels=c("a)", "b)"))  
## Warning: Removed 2 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).

Comparing the Ne indicator against a mock IUCN assessment adding up all populations

Generate mock Ne data for the entire species, by adding up the Ne of each population within each species.

# Sum the Ne of each population within spp
x <- ind1_data %>% group_by(X_uuid, taxon, country_assessment, multiassessment) %>% # this groups by individual species, considering mutliassessed species
              # sum Ne by individual species, keeping multiassesments separate
              summarise(Ne_mock_species = sum(Ne_combined, na.rm = TRUE))   %>%

              # average for multiassessed records in a single species
              group_by(country_assessment, multiassessment, taxon) %>% 
              summarise(Ne_mock_species=mean(Ne_mock_species, na.rm=TRUE))  
## `summarise()` has grouped output by 'X_uuid', 'taxon', 'country_assessment'.
## You can override using the `.groups` argument.
## `summarise()` has grouped output by 'country_assessment', 'multiassessment'.
## You can override using the `.groups` argument.
# Add mto indicator data
indicators_averaged_one<- left_join(indicators_averaged_one, x) %>%

  # add a below above category
  mutate(Ne_mock_category = ifelse(Ne_mock_species > 500, 'Above 500', 'Below 500'))
## Joining, by = c("country_assessment", "taxon", "multiassessment")
indicators_averaged_one %>% select(taxon, country_assessment, Ne_mock_species, Ne_mock_category) %>% head()
## Adding missing grouping variables: `multiassessment`

Plot the Ne indicator as in the violion plots above, but coloring the points showing which species would be below or above Ne 500 if considering Ne at the species level.

# add sampling size
sample_size <- indicators_averaged_one %>%
               filter(!is.na(Ne_mock_species)) %>% 
               filter(!is.na(global_IUCN)) %>% 
               filter(Ne_mock_species<1000000) %>% 
               group_by(global_IUCN) %>% summarize(num=n())

# new df 
df<- indicators_averaged_one %>% 
     filter(!is.na(Ne_mock_species)) %>% 
     filter(!is.na(global_IUCN)) %>% 
     filter(Ne_mock_species<1000000) %>% 
        # add sampling size 
        left_join(sample_size) %>%
        mutate(myaxis = paste0(global_IUCN, " (n= ", num, ")"))
## Joining, by = "global_IUCN"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis, 
                  #grep is used below to get the sample size, which may change depending on the data
                  levels=c(grep("cr", unique(df$myaxis), value = TRUE),
                          grep("en", unique(df$myaxis), value = TRUE),
                          grep("vu", unique(df$myaxis), value = TRUE),
                          grep("nt", unique(df$myaxis), value = TRUE),
                          grep("lc", unique(df$myaxis), value = TRUE),
                          grep("dd", unique(df$myaxis), value = TRUE),
                          grep("not_assessed", unique(df$myaxis), value = TRUE),
                          grep("unknown", unique(df$myaxis), value = TRUE)))

df$global_IUCN<-factor(df$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))

      
# Plot the Ne indicator as above, but with points colored by Ne_mock above or below 500

p1<- df %>%
    ggplot(aes(x=myaxis, y=indicator1_mean , fill=global_IUCN)) +
      geom_violin(width=1, linewidth = 0)  +
      xlab("") + ylab("Proportion of populations within species with Ne>500") +
      coord_flip() +
      scale_fill_manual(values= IUCNcolors, # iucn color codes
                        breaks=c(levels(df$global_IUCN)), 
                        guide = "none") + # hide legend
      scale_x_discrete(limits=rev) +
   
     # add new scale color for points
     new_scale_color() +
     geom_jitter(size=1, width = 0.1, aes(color = Ne_mock_category)) +
     scale_color_manual(values=c("black", "#F0A6CA")) +
     labs(color= "Species total Ne") +
  
  # theme stuff
      theme_light() +
      theme(panel.border = element_blank(), legend.position = "bottom",
            plot.title = element_text(hjust = 0.5), # center title
            text= element_text(size=15))
## Warning: Removed 26 rows containing non-finite values (`stat_ydensity()`).
p1
## Warning: Removed 26 rows containing non-finite values (`new_stat_ydensity()`).
## Warning: Removed 26 rows containing missing values (`geom_point()`).

Plot bar with numbers

p2<-indicators_averaged_one %>%
  filter(!is.na(Ne_mock_category)) %>% 
  ggplot(aes(x=global_IUCN, fill=Ne_mock_category))+
  geom_bar(position = "dodge") +
  scale_fill_manual(values=c("black", "#F0A6CA")) +
                  labs(fill= "Species total Ne") +
  coord_flip() + xlab("") +
  theme_light() +
  scale_x_discrete(limits=rev) +
  theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p2

Summnary table by Red List category

x  <- indicators_averaged_one %>%
               filter(!is.na(indicator1_mean)) %>% 
               group_by(global_IUCN, Ne_mock_category) %>% 
               summarize(n=n(),
                         mean=mean(indicator1_mean),
                         median=median(indicator1_mean),
                         per.0=sum(indicator1_mean==0) / n *100,
                         per.below.25=sum(indicator1_mean<0.25) / n *100,
                         per.below.90=sum(indicator1_mean<0.90) / n *100,
                         per.above.75=sum(indicator1_mean>0.75)/ n *100,
                         per1=sum(indicator1_mean==1) / n *100)
## `summarise()` has grouped output by 'global_IUCN'. You can override using the
## `.groups` argument.
kable(x, digits = 2)
global_IUCN Ne_mock_category n mean median per.0 per.below.25 per.below.90 per.above.75 per1
cr Above 500 9 0.56 0.67 22.22 33.33 66.67 33.33 33.33
cr Below 500 37 0.00 0.00 100.00 100.00 100.00 0.00 0.00
en Above 500 19 0.63 0.60 15.79 26.32 52.63 47.37 47.37
en Below 500 29 0.00 0.00 100.00 100.00 100.00 0.00 0.00
vu Above 500 30 0.70 0.77 3.33 10.00 53.33 50.00 46.67
vu Below 500 35 0.00 0.00 100.00 100.00 100.00 0.00 0.00
vu NA 1 0.00 0.00 100.00 100.00 100.00 0.00 0.00
nt Above 500 27 0.45 0.33 14.81 48.15 70.37 29.63 29.63
nt Below 500 24 0.00 0.00 100.00 100.00 100.00 0.00 0.00
lc Above 500 114 0.60 0.61 14.04 28.07 54.39 47.37 45.61
lc Below 500 72 0.00 0.00 100.00 100.00 100.00 0.00 0.00
dd Above 500 7 0.63 1.00 14.29 28.57 42.86 57.14 57.14
dd Below 500 3 0.00 0.00 100.00 100.00 100.00 0.00 0.00
not_assessed Above 500 78 0.37 0.25 26.92 48.72 82.05 17.95 17.95
not_assessed Below 500 78 0.00 0.00 100.00 100.00 100.00 0.00 0.00
unknown Above 500 2 1.00 1.00 0.00 0.00 0.00 100.00 100.00
unknown Below 500 1 0.00 0.00 100.00 100.00 100.00 0.00 0.00
NA Below 500 2 0.00 0.00 100.00 100.00 100.00 0.00 0.00

Summary table by Ne_mock_category

x  <- indicators_averaged_one %>%
               filter(!is.na(indicator1_mean)) %>% 
               group_by(Ne_mock_category) %>% 
               summarize(n=n(),
                         mean=mean(indicator1_mean),
                         median=median(indicator1_mean),
                         per.0=sum(indicator1_mean==0) / n *100,
                         per.below.25=sum(indicator1_mean<0.25) / n *100,
                         per.below.90=sum(indicator1_mean<0.90) / n *100,
                         per.above.75=sum(indicator1_mean>0.75)/ n *100,
                         per1=sum(indicator1_mean==1) / n *100)

kable(x, digits = 2)
Ne_mock_category n mean median per.0 per.below.25 per.below.90 per.above.75 per1
Above 500 286 0.54 0.5 16.78 33.57 62.94 38.11 37.06
Below 500 281 0.00 0.0 100.00 100.00 100.00 0.00 0.00
NA 1 0.00 0.0 100.00 100.00 100.00 0.00 0.00

How many Above 500 have an Ne value below 1?

Supplementary figure: 2 panels IUCN violin plots + mock Ne, and bar plot

plot_grid(p1 + theme(legend.position = ""), 
          p2 ,
          ncol = 1, labels=c("a)", "b)"))
## Warning: Removed 26 rows containing non-finite values (`new_stat_ydensity()`).
## Warning: Removed 26 rows containing missing values (`geom_point()`).

Indicator values by taxonomic group

All the following plots and analyses consider the average of multiassessed species (variable _mean), so that they are shown only once.

We also grouped taxa with small n (<5) into “others”, according to the following table:

table(indicators_averaged_one$taxonomic_group)
## 
##     amphibian          bird          fish  invertebrate        mammal 
##            56           167            62           135           135 
##       reptile    angiosperm     bryophyte    gymnosperm pteridophytes 
##            70           235             5            19            14 
##        fungus         other 
##             3            18

They are grouped along with “other” in a new category “others” in the new variable taxonomic_group_simplified:

indicators_averaged_one <- indicators_averaged_one %>% 
                           ungroup() %>% 
  mutate(taxonomic_group_simplified = case_when(
                                       # if the taxon group is in the list of groups with small n change to "others"
                                       as.character(taxonomic_group) %!in% c("bryophyte", "fungus", "other") ~  as.character(taxonomic_group),
                                       TRUE ~ "others"))

# check:
table(indicators_averaged_one$taxonomic_group_simplified)
## 
##     amphibian    angiosperm          bird          fish    gymnosperm 
##            56           235           167            62            19 
##  invertebrate        mammal        others pteridophytes       reptile 
##           135           135            26            14            70

We also create a group of only 3 categories for animals, plants and others:

# Define the grouping map
grouping_map <- c(
  "amphibian", "bird", "fish", "invertebrate", "mammal",
  "angiosperm", "gymnosperm", "reptile", "pteridophytes", "others"
)

# Create a new variable taxonomic_group_3
indicators_averaged_one <- indicators_averaged_one %>%
                            mutate(
                              taxonomic_group_3 = case_when(
                                taxonomic_group_simplified %in% grouping_map[1:5] ~ "animals",
                                taxonomic_group_simplified %in% grouping_map[6:9] ~ "plants",
                                taxonomic_group_simplified %in% grouping_map[10] ~ "others",
                                TRUE ~ NA_character_
                              )
                            )

# reorder levels
indicators_averaged_one$taxonomic_group_3<- factor(indicators_averaged_one$taxonomic_group_3, 
                                                  levels=c("animals", "plants", "others"))

Violin plots, histograms and summary tables for each indicator by taxonomic group

Indicator Ne > 500

## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
               filter(!is.na(indicator1_mean)) %>% 
               group_by(taxonomic_group_simplified) %>% summarize(num=n())

# new df 
df<- indicators_averaged_one %>% 
     filter(!is.na(indicator1_mean)) %>% 
        # add sampling size 
        left_join(sample_size) %>%
        mutate(myaxis = paste0(taxonomic_group_simplified, " (n= ", num, ")"))
## Joining, by = "taxonomic_group_simplified"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis, 
                  #grep is used below to get the sample size, which may change depending on the data
                    levels=c(grep("amphibian", unique(df$myaxis), value = TRUE), 
                             grep("bird" , unique(df$myaxis), value = TRUE),
                             grep("fish" , unique(df$myaxis), value = TRUE),
                             grep("invertebrate", unique(df$myaxis), value = TRUE),
                             grep("mammal", unique(df$myaxis), value = TRUE),
                             grep("reptile", unique(df$myaxis), value = TRUE),
                             grep("angiosperm", unique(df$myaxis), value = TRUE),
                             grep("gymnosperm", unique(df$myaxis), value = TRUE),
                             grep("pteridophytes", unique(df$myaxis), value = TRUE),
                             grep("others" , unique(df$myaxis), value = TRUE)))

df$taxonomic_group_simplified<-factor(df$taxonomic_group_simplified, 
                                       levels=c("amphibian", "bird" , "fish" , "invertebrate", "mammal", "reptile",
                                                 "angiosperm",  "gymnosperm", "pteridophytes",
                                                 "others"))

        
# plot
p1<-df %>%
    ggplot(aes(x=myaxis, y=indicator1_mean, fill=taxonomic_group_simplified, color=taxonomic_group_simplified)) +
      geom_violin(width=1.5, linewidth = 0.2)  +
      geom_jitter(size=.7, width = 0.1, color="black") +
      xlab("") + ylab("Proportion of populations within species with Ne>500") +
      coord_flip() +
      scale_x_discrete(limits=rev) +
      scale_fill_manual(values= c(rep(grouped_taxon_colors[1], 6), # for animals
                                  rep(grouped_taxon_colors[2], 3), # for platns
                                  rep(grouped_taxon_colors[3], 1)), # for fungi and others
                       breaks=c(levels(df$taxonomic_group_simplified))) +
      scale_color_manual(values= c(rep(grouped_taxon_colors[1], 6), # for animals
                                  rep(grouped_taxon_colors[2], 3), # for platns
                                  rep(grouped_taxon_colors[3], 1)), # for fungi and others
                       breaks=c(levels(df$taxonomic_group_simplified))) +
      theme_light() +
      theme(panel.border = element_blank(), legend.position="none",
            text= element_text(size=15))
p1
## Warning: `position_dodge()` requires non-overlapping x intervals

Table with sampling size, mean indicator value and proporiton of taxa where the value is below 0.25, 0.50 and 0.75:

#summary table by taxonomic group
x  <- indicators_averaged_one %>%
               filter(!is.na(indicator1_mean)) %>% 
               filter(!is.na(taxonomic_group_simplified)) %>% 
               group_by(taxonomic_group_simplified) %>% 
               summarize(n=n(),
                         mean=mean(indicator1_mean),
                         median=median(indicator1_mean),
                         n.below.75=sum(indicator1_mean<0.75),
                         n.below.50=sum(indicator1_mean<0.50),
                         n.below.25=sum(indicator1_mean<0.25),
                         per.below.25=n.below.25/n*100,
                         per.below.50=n.below.50/n*100)

# Calculate total counts and means
total_counts <- summarise(x,
                          taxonomic_group_simplified = "ALL",
                          n = sum(n),
                          mean= mean(mean),
                          median=median(median),
                          n.below.75 = sum(n.below.75),
                          n.below.50 = sum(n.below.50),
                          n.below.25 = sum(n.below.25),
                          per.below.25 = n.below.25 / n * 100,
                          per.below.50 = n.below.50 / n * 100)

# Bind the total row to the summary_table
summary_table <- bind_rows(x, total_counts)

# keep taxonomic groups as level in desired order:
summary_table$taxonomic_group_simplified<-factor(summary_table$taxonomic_group_simplified,
                                                 levels = c("amphibian", "bird" , "fish" , "invertebrate", "mammal",
                                                 "angiosperm", "gymnosperm",  "reptile", "pteridophytes",
                                                "others", "ALL"))
summary_table<- summary_table %>% arrange(taxonomic_group_simplified)

# show nice table
kable(summary_table, digits=2)
taxonomic_group_simplified n mean median n.below.75 n.below.50 n.below.25 per.below.25 per.below.50
amphibian 26 0.16 0.00 25 21 19 73.08 80.77
bird 91 0.32 0.00 66 60 58 63.74 65.93
fish 34 0.39 0.20 25 20 18 52.94 58.82
invertebrate 65 0.28 0.00 51 45 44 67.69 69.23
mammal 96 0.42 0.08 62 54 50 52.08 56.25
angiosperm 188 0.18 0.00 170 154 140 74.47 81.91
gymnosperm 15 0.16 0.00 13 13 12 80.00 86.67
reptile 32 0.29 0.00 24 23 21 65.62 71.88
pteridophytes 11 0.18 0.00 11 8 8 72.73 72.73
others 10 0.15 0.00 9 8 8 80.00 80.00
ALL 568 0.25 0.00 456 406 378 66.55 71.48

Indicator Proportion of maintained populations:

## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
               filter(!is.na(indicator2_mean)) %>% 
               group_by(taxonomic_group_simplified) %>% summarize(num=n())

# new df 
df<- indicators_averaged_one %>% 
     filter(!is.na(indicator2_mean)) %>% 
        # add sampling size 
        left_join(sample_size) %>%
        mutate(myaxis = paste0(taxonomic_group_simplified, " (n= ", num, ")"))
## Joining, by = "taxonomic_group_simplified"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis, 
                  #grep is used below to get the sample size, which may change depending on the data
                    levels=c(grep("amphibian", unique(df$myaxis), value = TRUE), 
                             grep("bird" , unique(df$myaxis), value = TRUE),
                             grep("fish" , unique(df$myaxis), value = TRUE),
                             grep("invertebrate", unique(df$myaxis), value = TRUE),
                             grep("mammal", unique(df$myaxis), value = TRUE),
                             grep("reptile", unique(df$myaxis), value = TRUE),
                             grep("angiosperm", unique(df$myaxis), value = TRUE),
                             grep("gymnosperm", unique(df$myaxis), value = TRUE),
                             grep("pteridophytes", unique(df$myaxis), value = TRUE),
                             grep("others" , unique(df$myaxis), value = TRUE)))

df$taxonomic_group_simplified<-factor(df$taxonomic_group_simplified, 
                           levels=c("amphibian", "bird" , "fish" , "invertebrate", "mammal",  "reptile",
                                     "angiosperm",  "gymnosperm", "pteridophytes",
                                     "others"))

        
# plot
p2<-df %>%
    ggplot(aes(x=myaxis, y=indicator2_mean, fill=taxonomic_group_simplified, color=taxonomic_group_simplified)) +
      geom_violin(width=1, linewidth = 0.2)  +
      geom_jitter(size=.7, width = 0.1, color="black") +
      xlab("") + ylab("Proportion of maintained populations within species") +
      coord_flip() +
      scale_x_discrete(limits=rev) +
      scale_fill_manual(values= c(rep(grouped_taxon_colors[1], 6), # for animals
                                  rep(grouped_taxon_colors[2], 3), # for platns
                                  rep(grouped_taxon_colors[3], 1)), # for fungi and others
                       breaks=c(levels(df$taxonomic_group_simplified))) +
      scale_color_manual(values= c(rep(grouped_taxon_colors[1], 6), # for animals
                                  rep(grouped_taxon_colors[2], 3), # for platns
                                  rep(grouped_taxon_colors[3], 1)), # for fungi and others
                       breaks=c(levels(df$taxonomic_group_simplified))) +
      theme_light() +
      theme(panel.border = element_blank(), legend.position="none",
            text= element_text(size=15))
p2

Table with sampling size, mean indicator value and proporiton of taxa where the value is below 0.25, 0.50 and 0.75:

# summary table for taxonomic group:
x  <- indicators_averaged_one %>%
               filter(!is.na(indicator2_mean)) %>% 
               filter(!is.na(taxonomic_group_simplified)) %>% 
               group_by(taxonomic_group_simplified) %>% 
               summarize(n=n(),
                         mean=mean(indicator2_mean),
                         median=median(indicator2_mean),
                         n.below.75=sum(indicator2_mean<0.75),
                         n.below.50=sum(indicator2_mean<0.50),
                         n.below.25=sum(indicator2_mean<0.25),
                         per.below.25=n.below.25/n*100,
                         per.below.50=n.below.50/n*100)


# Calculate total counts and means
total_counts <- summarise(x,
                          taxonomic_group_simplified = "ALL",
                          n = sum(n),
                          mean = mean(mean),
                          median = median(median),
                          n.below.75 = sum(n.below.75),
                          n.below.50 = sum(n.below.50),
                          n.below.25 = sum(n.below.25),
                          per.below.25 = n.below.25 / n * 100,
                          per.below.50 = n.below.50 / n * 100)

# Bind the total row to the summary_table
summary_table <- bind_rows(x, total_counts)

# keep taxonomic groups as level in desired order:
summary_table$taxonomic_group_simplified<-factor(summary_table$taxonomic_group_simplified,
                                                 levels = c("amphibian", "bird" , "fish" , "invertebrate", "mammal",
                                                 "angiosperm", "gymnosperm",  "reptile", "pteridophytes",
                                                "others", "ALL"))
summary_table<- summary_table %>% arrange(taxonomic_group_simplified)

# show nice table
kable(summary_table, digits=2)
taxonomic_group_simplified n mean median n.below.75 n.below.50 n.below.25 per.below.25 per.below.50
amphibian 43 0.85 1.00 9 4 1 2.33 9.30
bird 68 0.79 1.00 25 9 2 2.94 13.24
fish 40 0.78 0.86 17 3 1 2.50 7.50
invertebrate 77 0.67 0.67 40 21 7 9.09 27.27
mammal 80 0.94 1.00 8 3 0 0.00 3.75
angiosperm 139 0.83 1.00 36 13 4 2.88 9.35
gymnosperm 9 0.97 1.00 0 0 0 0.00 0.00
reptile 35 0.90 1.00 5 2 0 0.00 5.71
pteridophytes 8 0.82 1.00 3 1 0 0.00 12.50
others 19 0.82 0.88 6 1 0 0.00 5.26
ALL 518 0.84 1.00 149 57 15 2.90 11.00

Histograms and summary tables by 3 taxonomic groups (animals, plants, others)

By animals, plants, others:

# Create a histogram 
hist_p1 <- indicators_averaged_one %>%
                  ggplot(aes(x = indicator1_mean, fill = taxonomic_group_3)) +
                  geom_histogram( bins = 25, color="white") + # Adjust the number of bins as needed
                  labs(x = "Proportion of populations within species with Ne>500", y = "Frequency") +
                  scale_fill_manual(
                    values = grouped_taxon_colors, # Custom colors for animals, plants, and others
                    breaks = c("animals", "plants", "others"),
                    name = "Taxonomic Group")+
                  theme_light() +
                  theme(panel.border = element_blank(), text = element_text(size = 15), 
                        legend.position = "right") +
                  guides(fill = guide_legend(title = NULL))

# plot
hist_p1
## Warning: Removed 351 rows containing non-finite values (`stat_bin()`).

Summary table for Ne indicator 3 taxonomic groups:

x  <- indicators_averaged_one %>%
               filter(!is.na(indicator1_mean)) %>% 
               filter(!is.na(taxonomic_group_3)) %>% 
               group_by(taxonomic_group_3) %>% 
               summarize(n=n(),
                         mean=mean(indicator1_mean),
                         median=median(indicator1_mean),
                         per.0=sum(indicator1_mean==0) / n *100,
                         per.below.25=sum(indicator1_mean<0.25) / n *100,
                         per.below.90=sum(indicator1_mean<0.90) / n *100,
                         per.above.75=sum(indicator1_mean>0.75)/ n *100,
                         per1=sum(indicator1_mean==1) / n *100)



# Calculate total counts and means
total_counts <- indicators_averaged_one %>%
               filter(!is.na(indicator1_mean)) %>% 
               filter(!is.na(taxonomic_group_3)) %>% 
               ungroup() %>% 
               summarize(taxonomic_group_3 = "ALL",
                          n= n(),
                          mean = mean(indicator1_mean),
                          median = median(indicator1_mean),
                          per.0=sum(indicator1_mean==0) / n *100,
                          per.below.25=sum(indicator1_mean<0.25) / n *100,
                          per.below.90=sum(indicator1_mean<0.90) / n *100,
                          per.above.75=sum(indicator1_mean>0.75)/ n *100,
                          per1=sum(indicator1_mean==1) / n *100)

# Bind the total row to the summary_table
summary_table <- bind_rows(x, total_counts)

# keep taxonomic groups as level in desired order:
summary_table$taxonomic_group_3<-factor(summary_table$taxonomic_group_3,
                                                 levels = c("animals", "plants", "others", "ALL"))
summary_table<- summary_table %>% arrange(taxonomic_group_3)

kable(summary_table, digits=2)
taxonomic_group_3 n mean median per.0 per.below.25 per.below.90 per.above.75 per1
animals 312 0.34 0 54.49 60.58 74.04 26.28 25.96
plants 246 0.19 0 61.79 73.58 90.24 10.57 9.76
others 10 0.15 0 80.00 80.00 90.00 10.00 10.00
ALL 568 0.27 0 58.10 66.55 81.34 19.19 18.66

PM Histogram for animal, plants, others:

# Create a histogram 
hist_p2 <- indicators_averaged_one %>%
                  ggplot(aes(x = indicator2_mean, fill = taxonomic_group_3)) +
                  geom_histogram(bins = 25, color="white") + # Adjust the number of bins as needed
                  labs(x = "Proportion of maintained populations within species", y = "Frequency") +
                  scale_fill_manual(
                    values = grouped_taxon_colors, # Custom colors for animals, plants, and others
                    breaks = c("animals", "plants", "others"),
                    name = "Taxonomic Group")+
                  theme_light() +
                  theme(panel.border = element_blank(), text = element_text(size = 15)) +
                  guides(fill = guide_legend(title = NULL))

# plot
hist_p2
## Warning: Removed 401 rows containing non-finite values (`stat_bin()`).

Summary table for PM indicator 3 taxonomic groups

x  <- indicators_averaged_one %>%
               filter(!is.na(indicator2_mean)) %>% 
               filter(!is.na(taxonomic_group_3)) %>% 
               group_by(taxonomic_group_3) %>% 
               summarize(n=n(),
                         mean=mean(indicator2_mean),
                         median=median(indicator2_mean),
                         per0=sum(indicator2_mean==0) / n *100,
                         per.below.25=sum(indicator2_mean<0.25) / n *100,
                         per.below.90=sum(indicator2_mean<0.90) / n *100,
                         per.above.75=sum(indicator2_mean>0.75) / n *100,
                         per1=sum(indicator2_mean==1) / n *100)

# Calculate total counts and means
total_counts <- indicators_averaged_one %>%
               filter(!is.na(indicator2_mean)) %>% 
               filter(!is.na(taxonomic_group_3)) %>% 
               ungroup() %>% 
               summarize(taxonomic_group_3 = "ALL",
                          n= n(),
                          mean = mean(indicator2_mean),
                          median = median(indicator2_mean),
                          per0=sum(indicator2_mean==0) / n *100,
                          per.below.25=sum(indicator2_mean<0.25) / n *100,
                          per.below.90=sum(indicator2_mean<0.90) / n *100,
                          per.above.75=sum(indicator2_mean>0.75) / n *100,
                          per1=sum(indicator2_mean==1) / n *100)

# Bind the total row to the summary_table
summary_table <- bind_rows(x, total_counts)

# keep taxonomic groups as level in desired order:
summary_table$taxonomic_group_3<-factor(summary_table$taxonomic_group_3,
                                                 levels = c("animals", "plants", "others", "ALL"))
summary_table<- summary_table %>% arrange(taxonomic_group_3)

kable(summary_table, digits=2)
taxonomic_group_3 n mean median per0 per.below.25 per.below.90 per.above.75 per1
animals 308 0.81 1.00 0.65 3.57 42.21 65.91 53.57
plants 191 0.85 1.00 0.52 2.09 37.17 74.35 56.02
others 19 0.82 0.88 0.00 0.00 52.63 63.16 26.32
ALL 518 0.82 1.00 0.58 2.90 40.73 68.92 53.47

Main Figure: Single figure 4 panels for violin plots and histograms for both indicators by taxonomic group

plot_grid(p2, hist_p2, 
          p1, hist_p1,
          ncol=2, align = "v", labels=c("a)", "b)", "c)", "d)"))
## Warning: Removed 401 rows containing non-finite values (`stat_bin()`).
## Warning: `position_dodge()` requires non-overlapping x intervals
## Warning: Removed 351 rows containing non-finite values (`stat_bin()`).
## Warning: Graphs cannot be vertically aligned unless the axis parameter is set.
## Placing graphs unaligned.

Values of indicator 1 and indicator 2 for multiassessed species

#subset only with taxa assessed multiple times:
only_multi<-indicators_full %>% 
                          filter(multiassessment=="multiassessment") 

First, check how indicator 1 changes across the multiassessments.

p1<-only_multi %>% 
  # Keep rows with different values in indicator1 within each taxon group
  group_by(taxon) %>%
  filter(n_distinct(indicator1) > 1) %>%
  # plot
  ggplot(aes(x=taxon, y=indicator1)) +
          geom_line(colour="darkgrey") + 
          geom_point(aes(color=country_assessment)) +
  xlab("") + ylab("Proportion of populations within species with Ne>500") +
  labs(color="country") +
  ylim(0, 1)+
  coord_flip() +
  theme_light() + 
  theme(panel.border = element_blank(), legend.position="right", text= element_text(size=13))
p1
## Warning: Removed 5 rows containing missing values (`geom_line()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).

Now check how Proportion of maintained populations (indicator 2) changes across the multiassessments.

p2<-only_multi %>% 
  # Keep rows with different values in indicator1 within each taxon group
  group_by(taxon) %>%
  filter(n_distinct(indicator2) > 1) %>%
  
  ggplot(aes(x=taxon, y=indicator2)) +
          geom_line(colour="darkgrey") + 
          geom_point(aes(color=country_assessment)) +
    scale_color_manual(values= scales::hue_pal()(4)[2:4]) + # last 3 colors to make them the same than the other plot
  xlab("") + ylab("Proportion of maintained populations within species") +
  labs(color="country") +
  coord_flip() +
  theme_light() + 
  theme(panel.border = element_blank(), legend.position="right", text= element_text(size=13))
p2
## Warning: Removed 6 rows containing missing values (`geom_line()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).

Plot together:

plot_grid(p2, p1,  
          rel_heights = c(1.3, 0.9),
          ncol=1, labels=c("a)", "b)")) 
## Warning: Removed 6 rows containing missing values (`geom_line()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).
## Warning: Removed 5 rows containing missing values (`geom_line()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).

Indicator 3 (number of species with genetic diversity monitoring)

Indicator 3 refers to the number (count) of taxa by country in which genetic monitoring is occurring. This is stored in the variable temp_gen_monitoring as a “yes/no” answer for each taxon.

indicator3

Plot by global IUCN redlist status

# desired order of levels
indicators_full$global_IUCN<-factor(as.factor(indicators_full$global_IUCN), levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))


## plot
indicators_full %>%
                 # keep only one record if the taxon was assessed more than once within the country
                 select(country_assessment, taxon, temp_gen_monitoring, global_IUCN) %>%
                 filter(!duplicated(.)) %>%

                 # count "yes" in tem_gen_monitoring by country
                 filter(temp_gen_monitoring=="yes") %>%
ggplot(aes(x=country_assessment, fill=global_IUCN)) +
  geom_bar() +
  xlab("") + ylab("Number of taxa with temporal genetic diversity monitoring") +
  scale_fill_manual(values= IUCNcolors, # iucn color codes
                    breaks=levels(as.factor(indicators_full$global_IUCN))) +
      theme_light()

Relatively few taxa have genetic monitoring, but many have some sort of genetic study. Let’s check that with a Sankey Plot:

# first subset the ind3_data keeping only taxa assessed a single time, plust the first record of those assessed multiple times.
ind3_data_firstmulti<-ind3_data[!duplicated(cbind(ind3_data$taxon, ind3_data$country_assessment)), ]

# transform data to how ggsankey wants it
df <- ind3_data_firstmulti %>%
  make_long(country_assessment, temp_gen_monitoring, gen_studies)

# plot
ggplot(df, aes(x = x,
               next_x = next_x,
               node = node,
               next_node = next_node,
               fill = factor(node),
               label = node)) +
  geom_sankey(flow.alpha = 0.5, 
              show.legend = FALSE) +
  geom_sankey_label(size = 2.5, color = "black", fill = "white") +
  theme_sankey(base_size = 10) +

    # manually set flow fill according to desired color
                            # countries
  scale_fill_manual(values=c(scales::hue_pal()(length(unique(ind3_data_firstmulti$country_assessment))),  
                             # traffic light for monitoring
                             c("darkolivegreen", "brown3", "darkgrey"),
                             # nice soft colors for gen_studies
                             c("grey50", "grey35", "grey50", "brown3")),
                              
                    breaks=c(unique(ind3_data_firstmulti$country_assessment),
                             unique(ind3_data_firstmulti$temp_gen_monitoring),
                             unique(ind3_data_firstmulti$gen_studies))) +
  
  xlab("")
## Warning: Removed 2 rows containing missing values (`geom_label()`).

table(ind3_data_firstmulti$gen_studies)
## 
##        no     phylo phylo_pop       pop 
##       375       190       244        99

Count data:

ind3_data %>%
                 # keep only one record if the taxon was assessed more than once within the country
                 select(country_assessment, taxon, gen_studies, temp_gen_monitoring) %>%
                 filter(!duplicated(.)) %>%

                 group_by(country_assessment, temp_gen_monitoring, gen_studies) %>%
                 summarise(n_studies=n())
## `summarise()` has grouped output by 'country_assessment',
## 'temp_gen_monitoring'. You can override using the `.groups` argument.

How many genetic studies ara available by country for species without temporal genetic diversity monitoring?

## plot
indicators_full %>%
                 # keep only one record if the taxon was assessed more than once within the country
                 select(country_assessment, taxon, temp_gen_monitoring, gen_studies) %>%
                 filter(!duplicated(.)) %>%
                 # keep only taxa without gen div monitoring
                 filter(temp_gen_monitoring=="no")%>%

ggplot(aes(x=country_assessment, fill=gen_studies)) +
  geom_bar() +
    scale_fill_manual(values=c("grey80", scales::hue_pal()(3)))+
  xlab("")  +
      theme_light()

Summary table of mean indicator values and n

The tables below show the indicator values and sampling size averaging them by country, taxonomic group, distribution type or IUCN global red list status. For this summary the mean of the multiassessed species was considering and counted as a single entry for the sampling size.

Codes for indicator names:

Codes for summary stats:

Summary stats by country:

x<-indicators_averaged_one %>% 
                group_by(country_assessment) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))

# nice table
kable(x, digits=3)
country_assessment n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
Australia 28 0.903 0.178 47 0.170 0.299 10
Belgium 27 0.453 0.221 101 0.246 0.381 10
Colombia 22 0.601 0.174 43 0.326 0.474 NA
France 34 0.854 0.278 55 0.416 0.471 7
Japan 50 0.925 0.152 50 0.077 0.180 0
Mexico 28 0.936 0.135 47 0.217 0.354 7
S. Africa 90 0.948 0.155 61 0.422 0.475 5
Sweden 120 0.777 0.271 83 0.188 0.331 20
US 117 0.794 0.244 79 0.354 0.410 6

Taxonomic groups

Summary stats by taxonomic group:

x<-indicators_averaged_one %>% 
                group_by(taxonomic_group) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))

# nice table
kable(x, digits=3)
taxonomic_group n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
amphibian 43 0.833 0.244 26 0.150 0.250 9
bird 67 0.789 0.265 91 0.321 0.445 NA
fish 40 0.768 0.245 34 0.414 0.448 11
invertebrate 77 0.671 0.309 65 0.277 0.403 4
mammal 80 0.937 0.161 95 0.419 0.461 22
reptile 35 0.902 0.176 31 0.288 0.437 1
angiosperm 138 0.834 0.242 188 0.177 0.311 6
bryophyte 4 0.688 0.252 2 0.250 0.354 0
gymnosperm 9 0.975 0.050 15 0.161 0.353 0
pteridophytes 8 0.824 0.251 11 0.179 0.284 0
fungus 3 0.903 0.167 2 0.500 0.707 0
other 12 0.844 0.141 6 0.000 0.000 3

Detailed table:

x<-indicators_averaged_one %>% 
                group_by(country_assessment, taxonomic_group) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# nice table
kable(x, digits=3)
country_assessment taxonomic_group n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
Australia amphibian 0 NaN NA 1 0.000 NA 0
Australia bird 9 1.000 0.000 9 0.167 0.264 2
Australia fish 1 1.000 NA 2 0.500 0.707 1
Australia invertebrate 1 0.500 NA 0 NaN NA 0
Australia mammal 3 0.750 0.250 10 0.303 0.359 3
Australia reptile 7 0.958 0.078 5 0.050 0.112 0
Australia angiosperm 2 0.700 0.424 15 0.115 0.276 1
Australia bryophyte 0 NaN NA 1 0.500 NA 0
Australia gymnosperm 0 NaN NA 2 0.000 0.000 0
Australia pteridophytes 0 NaN NA 1 0.000 NA 0
Australia other 5 0.887 0.141 1 0.000 NA 3
Belgium amphibian 3 0.310 0.170 9 0.186 0.330 1
Belgium fish 5 0.570 0.153 9 0.206 0.352 2
Belgium invertebrate 10 0.444 0.259 30 0.323 0.416 3
Belgium mammal 3 0.444 0.192 19 0.447 0.497 4
Belgium reptile 0 NaN NA 4 0.030 0.026 0
Belgium angiosperm 5 0.446 0.279 26 0.093 0.219 0
Belgium bryophyte 1 0.444 NA 1 0.000 NA 0
Belgium gymnosperm 0 NaN NA 1 0.050 NA 0
Belgium pteridophytes 0 NaN NA 2 0.250 0.354 0
Colombia amphibian 2 0.625 0.177 0 NaN NA 0
Colombia bird 19 0.604 0.181 31 0.419 0.502 NA
Colombia fish 0 NaN NA 2 0.500 0.707 0
Colombia mammal 1 0.500 NA 1 0.000 NA 0
Colombia reptile 0 NaN NA 2 0.000 0.000 0
Colombia angiosperm 0 NaN NA 6 0.000 0.000 0
Colombia other 0 NaN NA 1 0.000 NA 0
France amphibian 1 1.000 NA 1 0.000 NA 1
France bird 11 0.852 0.259 20 0.342 0.460 1
France fish 1 0.167 NA 6 0.589 0.463 2
France invertebrate 3 0.700 0.265 7 0.405 0.508 0
France mammal 11 0.955 0.151 10 0.217 0.416 3
France reptile 1 1.000 NA 2 0.500 0.707 0
France angiosperm 3 0.667 0.577 6 0.583 0.492 0
France gymnosperm 1 1.000 NA 2 1.000 0.000 0
France fungus 1 1.000 NA 1 1.000 NA 0
France other 1 0.900 NA 0 NaN NA 0
Japan angiosperm 39 0.931 0.130 39 0.061 0.148 0
Japan gymnosperm 4 1.000 0.000 4 0.000 0.000 0
Japan pteridophytes 7 0.847 0.262 7 0.210 0.316 0
Mexico amphibian 0 NaN NA 2 0.000 0.000 0
Mexico bird 1 0.667 NA 2 0.500 0.707 1
Mexico fish 0 NaN NA 0 NaN NA 0
Mexico invertebrate 1 1.000 NA 0 NaN NA 0
Mexico mammal 3 0.867 0.231 3 0.000 0.000 1
Mexico reptile 1 1.000 NA 4 0.500 0.577 0
Mexico angiosperm 20 0.959 0.120 29 0.236 0.339 5
Mexico gymnosperm 2 0.886 0.005 6 0.061 0.148 0
Mexico pteridophytes 0 NaN NA 1 0.000 NA 0
S. Africa amphibian 18 0.918 0.173 4 0.125 0.250 2
S. Africa bird 11 1.000 0.000 11 0.327 0.467 1
S. Africa fish 9 1.000 0.000 4 0.297 0.477 0
S. Africa invertebrate 0 NaN NA 0 NaN NA 0
S. Africa mammal 32 0.992 0.044 31 0.608 0.480 2
S. Africa reptile 7 0.869 0.254 1 1.000 NA 0
S. Africa angiosperm 12 0.833 0.277 10 0.060 0.190 0
S. Africa gymnosperm 1 1.000 NA 0 NaN NA 0
Sweden amphibian 13 0.891 0.183 9 0.192 0.219 5
Sweden bird 11 0.696 0.385 9 0.111 0.333 2
Sweden fish 7 0.738 0.290 4 0.299 0.476 4
Sweden invertebrate 29 0.674 0.292 20 0.078 0.225 0
Sweden mammal 20 0.986 0.047 15 0.361 0.447 8
Sweden reptile 7 0.983 0.045 3 0.619 0.541 1
Sweden angiosperm 22 0.622 0.259 18 0.159 0.258 0
Sweden bryophyte 2 0.904 0.048 0 NaN NA 0
Sweden pteridophytes 1 0.667 NA 0 NaN NA 0
Sweden fungus 2 0.855 0.205 1 0.000 NA 0
Sweden other 6 0.800 0.153 4 0.000 0.000 0
US amphibian 6 0.754 0.267 0 NaN NA 0
US bird 5 0.741 0.205 9 0.254 0.375 2
US fish 17 0.737 0.198 7 0.615 0.448 2
US invertebrate 33 0.730 0.324 8 0.492 0.471 1
US mammal 7 0.905 0.194 6 0.303 0.351 1
US reptile 12 0.823 0.202 10 0.271 0.444 0
US angiosperm 35 0.867 0.181 39 0.332 0.398 0
US bryophyte 1 0.500 NA 0 NaN NA 0
US gymnosperm 1 1.000 NA 0 NaN NA 0

IUCN

Summary stats:

x<-indicators_averaged_one %>% 
                group_by(global_IUCN) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))

# nice table
kable(x, digits=3)
global_IUCN n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
cr 36 0.825 0.272 46 0.109 0.284 8
en 59 0.786 0.254 48 0.260 0.415 9
vu 64 0.778 0.253 66 0.310 0.414 4
nt 42 0.821 0.263 50 0.237 0.375 7
lc 152 0.845 0.251 185 0.365 0.436 32
dd 9 0.707 0.313 10 0.442 0.490 2
not_assessed 152 0.833 0.235 156 0.187 0.329 3
unknown 2 1.000 0.000 3 0.667 0.577 0
NA 0 NaN NA 2 0.000 0.000 NA

Detailed table by IUCN category:

x<-indicators_averaged_one %>% 
                group_by(country_assessment, global_IUCN) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# nice table
kable(x, digits=3)
country_assessment global_IUCN n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
Australia cr 5 0.860 0.219 10 0.000 0.000 3
Australia en 4 0.850 0.300 7 0.167 0.264 2
Australia vu 6 0.943 0.101 8 0.260 0.355 1
Australia nt 4 1.000 0.000 5 0.353 0.328 0
Australia lc 3 1.000 0.000 8 0.229 0.367 1
Australia not_assessed 6 0.822 0.202 9 0.128 0.329 3
Australia unknown 0 NaN NA 0 NaN NA 0
Belgium cr 1 0.333 NA 2 0.500 0.707 0
Belgium en 1 0.455 NA 1 0.000 NA 0
Belgium vu 3 0.548 0.410 3 0.333 0.577 0
Belgium nt 2 0.310 0.034 13 0.030 0.058 3
Belgium lc 19 0.466 0.215 64 0.285 0.398 7
Belgium dd 1 0.333 NA 3 0.364 0.553 0
Belgium not_assessed 0 NaN NA 14 0.151 0.292 0
Belgium unknown 0 NaN NA 1 1.000 NA 0
Colombia cr 2 0.450 0.071 7 0.000 0.000 0
Colombia en 4 0.525 0.145 3 0.667 0.577 0
Colombia vu 11 0.659 0.195 15 0.133 0.352 0
Colombia nt 3 0.550 0.180 6 0.667 0.516 0
Colombia lc 2 0.667 0.000 10 0.600 0.516 0
Colombia NA 0 NaN NA 2 0.000 0.000 NA
France cr 2 0.583 0.589 5 0.040 0.089 1
France en 1 1.000 NA 3 0.333 0.577 1
France vu 4 0.725 0.320 9 0.481 0.467 0
France nt 7 0.839 0.277 6 0.333 0.516 0
France lc 17 0.953 0.133 28 0.476 0.482 4
France dd 0 NaN NA 2 1.000 0.000 1
France not_assessed 3 0.633 0.551 2 0.000 0.000 0
Japan cr 2 1.000 0.000 2 0.000 0.000 0
Japan en 1 1.000 NA 1 0.000 NA 0
Japan lc 3 1.000 0.000 3 0.021 0.036 0
Japan not_assessed 44 0.914 0.159 44 0.086 0.190 0
Mexico cr 4 1.000 0.000 3 0.333 0.577 1
Mexico en 9 0.919 0.163 12 0.083 0.289 3
Mexico vu 5 0.900 0.224 5 0.000 0.000 1
Mexico nt 1 0.889 NA 2 0.000 0.000 0
Mexico lc 5 0.936 0.092 12 0.497 0.367 2
Mexico dd 1 1.000 NA 1 0.333 NA 0
Mexico not_assessed 3 0.958 0.072 12 0.158 0.318 0
S. Africa cr 14 0.860 0.285 12 0.042 0.144 2
S. Africa en 16 0.895 0.182 9 0.467 0.469 1
S. Africa vu 14 0.982 0.067 12 0.500 0.522 1
S. Africa nt 8 0.969 0.088 8 0.253 0.356 0
S. Africa lc 34 1.000 0.000 18 0.667 0.485 1
S. Africa dd 1 1.000 NA 0 NaN NA 0
S. Africa not_assessed 2 0.750 0.354 1 0.000 NA 0
S. Africa unknown 1 1.000 NA 1 1.000 NA 0
Sweden en 5 0.489 0.208 2 0.050 0.071 0
Sweden vu 7 0.685 0.247 7 0.297 0.363 1
Sweden nt 8 0.816 0.273 5 0.054 0.074 1
Sweden lc 63 0.836 0.259 41 0.247 0.374 17
Sweden dd 4 0.549 0.299 4 0.250 0.500 1
Sweden not_assessed 33 0.744 0.268 24 0.085 0.228 0
US cr 6 0.828 0.164 5 0.467 0.447 1
US en 18 0.743 0.268 10 0.300 0.483 2
US vu 14 0.664 0.271 7 0.427 0.311 0
US nt 9 0.796 0.289 5 0.284 0.435 3
US lc 6 0.791 0.208 1 0.000 NA 0
US dd 2 0.917 0.118 0 NaN NA 0
US not_assessed 61 0.829 0.234 50 0.365 0.415 0
US unknown 1 1.000 NA 1 0.000 NA 0

Distribution type

Summary stats:

x<-indicators_averaged_one %>% 
                group_by(species_range) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))

# nice table
kable(x, digits=3)
species_range n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
restricted 309 0.795 0.266 313 0.187 0.344 24
unknown 14 0.760 0.259 20 0.300 0.470 1
wide ranging 193 0.867 0.217 231 0.384 0.432 40
NA 0 NaN NA 2 0.000 0.000 NA

Detailed table by IUCN category:

x<-indicators_averaged_one %>% 
                group_by(country_assessment, species_range) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# nice table
kable(x, digits=3)
country_assessment species_range n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
Australia restricted 14 0.865 0.224 27 0.114 0.253 4
Australia unknown 0 NaN NA 1 0.000 NA 0
Australia wide ranging 14 0.942 0.110 19 0.260 0.347 6
Belgium restricted 10 0.319 0.128 22 0.135 0.262 1
Belgium unknown 2 0.456 0.062 5 0.000 0.000 1
Belgium wide ranging 15 0.542 0.242 74 0.295 0.411 8
Colombia restricted 16 0.614 0.193 28 0.286 0.460 0
Colombia unknown 5 0.547 0.117 10 0.500 0.527 0
Colombia wide ranging 1 0.667 NA 3 0.333 0.577 0
Colombia NA 0 NaN NA 2 0.000 0.000 NA
France restricted 14 0.741 0.336 28 0.227 0.388 2
France wide ranging 20 0.933 0.202 27 0.611 0.476 5
Japan restricted 35 0.939 0.141 35 0.080 0.180 0
Japan unknown 1 1.000 NA 1 0.000 NA 0
Japan wide ranging 14 0.884 0.179 14 0.076 0.192 0
Mexico restricted 19 0.933 0.138 31 0.094 0.267 4
Mexico unknown 2 1.000 0.000 0 NaN NA 0
Mexico wide ranging 7 0.926 0.150 16 0.456 0.385 3
S. Africa restricted 41 0.905 0.206 29 0.217 0.391 4
S. Africa unknown 2 1.000 0.000 1 1.000 NA 0
S. Africa wide ranging 47 0.984 0.081 31 0.595 0.475 1
Sweden restricted 71 0.708 0.292 53 0.076 0.210 6
Sweden unknown 2 1.000 0.000 2 0.000 0.000 0
Sweden wide ranging 47 0.871 0.204 28 0.415 0.408 14
US restricted 89 0.813 0.243 60 0.367 0.418 3
US unknown 0 NaN NA 0 NaN NA 0
US wide ranging 28 0.735 0.244 19 0.314 0.393 3

Simplified figures and basic stats for text summary and policy brief

How many species and pops:

How many species:

nrow(indicators_averaged_one)
## [1] 919

How many assessments (including species assessed more than once):

nrow(indicators_full)
## [1] 982

How many populations, including all pops from species that were assessed more than once:

nrow(ind1_data)
## [1] 5652

How many populations, counting only once populations from taxa assessed more than once:

# This looks for the id of the taxa already keeping only 1 for the multiassessed taxa, and keeps those int he ind1_data (where the pops data is)
x<-ind1_data[ind1_data$X_uuid %in% indicators_averaged_one$X_uuid, ]

# the number of rows is the number of pops counting only once multiassessed taxa
nrow(x)
## [1] 5271

How many multiassesments:

sum(indicators_full$multiassessment=="multiassessment")
## [1] 107

Which taxa had multiassesments and how many:

x<- indicators_full %>% filter(multiassessment=="multiassessment") %>%
                    group_by(taxon) %>%
                    summarise(n=n())
kable(x)
taxon n
Alasmidonta varicosa 2
Alouatta palliata mexicana 2
Ambystoma cingulatum 4
Anguis fragilis 2
Aphelocoma coerulescens 5
Astragalus microcymbus 2
Barbastella barbastellus 2
Bombus terricola 2
Cambarus elkensis 2
Coronella austriaca 2
Cryptobranchus alleganiensis alleganiensis 2
Cryptomastix devia 2
Erimystax harryi 2
Etheostoma chienense 2
Etheostoma osburni 2
Hemphillia burringtoni 2
Heterelmis stephani 2
Hydroprogne caspia 2
Lavinia exilicauda chi 2
Lepidium papilliferum 2
Mustela nigripes 2
Necturus lewisi 2
Nicrophorus americanus 2
Notophthalmus perstriatus 16
Notropis mekistocholas 2
Notropis topeka 2
Noturus munitus 2
Obovaria subrotunda 2
Oncorhynchus apache 2
Oncorhynchus clarkii virginalis 2
Phonotimpus talquian 2
Pimelea spinescens subspecies spinescens 2
Plestiodon egregius egregius 2
Pleurobema rubrum 2
Procambarus orcinus 2
Pseudemys rubriventris 2
Rana dalmatina 2
Rhynchospora crinipes 2
Streptanthus bracteatus 2
Texella reyesi 2
Thamnophis sirtalis tetrataenia 2
Thoburnia atripinnis 2
Toxolasma lividum 2
Zapus hudsonius luteus 2

How many taxa with multiassesments?

nrow(x)
## [1] 44

Plain Histogram and stats for Ne > 500 indicator

Plain histogram:

# Create a histogram 
hist_p <- indicators_averaged_one %>%
                  ggplot(aes(x = indicator1_mean)) +
                  geom_histogram( bins = 25, fill="grey30") + # Adjust the number of bins as needed
                  labs(x = "Proportion of populations within species with Ne>500", y = "Frequency") +
                  theme_light() +
                  theme(panel.border = element_blank(), text = element_text(size = 15)) +
                  guides(fill = guide_legend(title = NULL))

# plot
hist_p
## Warning: Removed 351 rows containing non-finite values (`stat_bin()`).

Summary stats for the Ne 500 indicator:

x  <- indicators_averaged_one %>%
               filter(!is.na(indicator1_mean)) %>% 
               ungroup() %>% 
               summarize(n=n(),
                         mean=mean(indicator1_mean),
                         median=median(indicator1_mean),
                         per.0=sum(indicator1_mean==0) / n *100,
                         per.below.25=sum(indicator1_mean<0.25) / n *100,
                         per.below.90=sum(indicator1_mean<0.90) / n *100,
                         per.above.75=sum(indicator1_mean>0.75)/ n *100,
                         per1=sum(indicator1_mean==1) / n *100)
x
kable(x, digits = 2)
n mean median per.0 per.below.25 per.below.90 per.above.75 per1
568 0.27 0 58.1 66.55 81.34 19.19 18.66

Data availability for the Ne indicator. At the species level:

sum(!is.na(indicators_averaged_one$indicator1_mean)) / nrow(indicators_averaged_one)
## [1] 0.6180631

At the population level:

sum(!is.na(ind1_data$Ne_combined)) / nrow(ind1_data)
## [1] 0.811925

Populations below the Ne 500 threshold

x<- ind1_data %>% 
  ungroup() %>% 
  summarise(n_pops = n(), 
            n_pops_Ne_data = sum(!is.na(Ne_combined)), 
            n_pops_more_500 = sum(Ne_combined >= 500, na.rm = TRUE),
            n_pops_less_500 =sum(Ne_combined < 500, na.rm = TRUE),
            per_less_500 = n_pops_less_500/n_pops_Ne_data)
kable(x, digits=2)
n_pops n_pops_Ne_data n_pops_more_500 n_pops_less_500 per_less_500
5652 4589 604 3985 0.87

Plain Histogram and stats for Proportion Mantained populations

Plain histogram

# Create a histogram 
hist_p <- indicators_averaged_one %>%
                  ggplot(aes(x = indicator2_mean)) +
                  geom_histogram(bins = 25, fill="grey30") + # Adjust the number of bins as needed
                  labs(x = "Proportion of maintained populations within species", y = "Frequency") +
                  theme_light() +
                  theme(panel.border = element_blank(), text = element_text(size = 15)) +
                  guides(fill = guide_legend(title = NULL))

# plot
hist_p
## Warning: Removed 401 rows containing non-finite values (`stat_bin()`).

Summary stats for the PM indicator:

x  <- indicators_averaged_one %>%
               filter(!is.na(indicator2_mean)) %>% 
               ungroup() %>% 
               summarize(n=n(),
                         mean=mean(indicator2_mean),
                         median=median(indicator2_mean),
                         per0=sum(indicator2_mean==0) / n *100,
                         per.below.25=sum(indicator2_mean<0.25) / n *100,
                         per.below.90=sum(indicator2_mean<0.90) / n *100,
                         per.above.75=sum(indicator2_mean>0.75) / n *100,
                         per1=sum(indicator2_mean==1) / n *100)

kable(x, digits = 2)
n mean median per0 per.below.25 per.below.90 per.above.75 per1
518 0.82 1 0.58 2.9 40.73 68.92 53.47

Data availability, donuts and plot bars for Ne 500

Species level yes/no table with percentages for Ne 500 indicator

df<- indicators_full %>%
     group_by(popsize_data) %>%
   summarise(n=n(),
             percentage = (n / nrow(metadata)) * 100)
   
kable(df, digits = 0)
popsize_data n percentage
data_for_species 131 14
insuff_data_species 230 24
yes 614 64
NA 7 1

Donut only available data

df<- indicators_full %>%
     filter(popsize_data != "data_for_species") %>% # we want to show only data for pops or insufficient
     group_by(popsize_data) %>%
   summarise(n=n(),
             percentage = (n / nrow(metadata)) * 100)

# variable to make change the size of the hole
hsize <- 2 # to change the size of the hole. larger=bigger 
df <- df %>% 
  mutate(x = hsize)  

# donut plot
p <- ggplot(df, aes(x = hsize, y = n, fill = popsize_data)) +
  geom_col() +
  coord_polar(theta = "y") +
  scale_fill_manual(values=c("#2ca02c", "grey80"),
                    breaks=c("yes", "insuff_data_species"),
                    labels=c("Population level", "Insufficient data")) +

  xlim(c(0.2, hsize + 0.5)) + theme_void()
p

Species level yes/no. Bar plot for Ne 500

indicators_full %>%
     filter(popsize_data != "data_for_species") %>% # we want to show only data for pops or insufficient
      ggplot(aes(x=country_assessment, fill = popsize_data)) +
      geom_bar(position = "fill", color="white") +
      scale_fill_manual(values=c("#2ca02c", "grey80"),
                        breaks=c("yes", "insuff_data_species"),
                        labels=c("Population level", "Insufficient data")) +
      scale_x_discrete(limits=rev) + xlab("") + ylab("Data availability (% of species)") +
      coord_flip() +
      theme_light()

Population level, what kind? Table

# we first need the column numbers
df<-ind1_data %>%
   mutate(Ne_calculated_from = replace_na(Ne_calculated_from, "no data available")) %>%
   group_by(Ne_calculated_from) %>%
   summarise(n=n(),
             percentage = (n / nrow(ind1_data)) * 100)
   
kable(df, digits = 0)
Ne_calculated_from n percentage
genetic data 349 6
NcPoint ratio 1266 22
NcRange ratio 2974 53
no data available 1063 19

Donut

# variable to make change the size of the hole
hsize <- 3 # to change the size of the hole. larger=bigger 
df <- df %>% 
  mutate(x = hsize)  

# donut plot
p <- ggplot(df, aes(x = hsize, y = n, fill = Ne_calculated_from)) +
  geom_col() +
  coord_polar(theta = "y") +
  scale_fill_manual(labels=c("genetic data", "NcPoint ratio", "NcRange ratio", "no data available"),
                      breaks=c("genetic data", "NcPoint ratio", "NcRange ratio", "no data available"),
                      values=c("darkgreen", "#0072B2", "#E69F00", "grey80")) +
  xlim(c(0.2, hsize + 0.5)) + theme_void()
p

Data availability for PM indicator

Total taxa with NA in extinct populations:

sum(is.na(indicators_full$n_extint_populations))
## [1] 417

Percentage of missing data

sum(is.na(indicators_full$n_extint_populations))/nrow(indicators_full)
## [1] 0.4246436

Total taxa with data availability on extinct pops

sum(!is.na(indicators_full$n_extint_populations))
## [1] 565

Percentage of taxa with data availability on extinct pops (which also includes NA for extant, see above)

sum(!is.na(indicators_full$n_extint_populations))/nrow(indicators_full)
## [1] 0.5753564

Data availability for at least one indicator

Data availability for at least one indicator. Including multiassesments

# number
x<- indicators_full %>%
     filter(popsize_data=="yes" | !is.na(n_extint_populations))
nrow(x)
## [1] 817
# percentage
nrow(x) / nrow(indicators_full)
## [1] 0.8319756

Data availability for at least one indicator. Keeping only one of the multiassesments

# number
x<- indicators_averaged_one %>%
     filter(popsize_data=="yes" | !is.na(n_extint_populations))
nrow(x)
## [1] 765
# percentage
nrow(x) / nrow(indicators_averaged_one)
## [1] 0.8324266

Session Info for reproducibility purposes:

sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] glmmTMB_1.1.7      knitr_1.39         lme4_1.1-31        Matrix_1.5-3      
##  [5] cowplot_1.1.1      viridis_0.6.3      viridisLite_0.4.0  alluvial_0.1-2    
##  [9] ggnewscale_0.4.9   ggsankey_0.0.99999 ggplot2_3.4.1      stringr_1.4.0     
## [13] utile.tools_0.2.7  readr_2.1.2        dplyr_1.0.9        tidyr_1.2.0       
## 
## loaded via a namespace (and not attached):
##  [1] TMB_1.9.6           tidyselect_1.1.2    xfun_0.31          
##  [4] bslib_0.3.1         purrr_0.3.4         splines_4.2.1      
##  [7] lattice_0.20-45     colorspace_2.0-3    vctrs_0.5.2        
## [10] generics_0.1.3      htmltools_0.5.5     yaml_2.3.5         
## [13] utf8_1.2.2          rlang_1.0.6         nloptr_2.0.3       
## [16] jquerylib_0.1.4     pillar_1.7.0        glue_1.6.2         
## [19] withr_2.5.0         DBI_1.1.3           lifecycle_1.0.3    
## [22] munsell_0.5.0       gtable_0.3.0        evaluate_0.15      
## [25] labeling_0.4.2      tzdb_0.3.0          fastmap_1.1.0      
## [28] fansi_1.0.3         highr_0.9           Rcpp_1.0.10        
## [31] scales_1.2.0        jsonlite_1.8.0      farver_2.1.1       
## [34] gridExtra_2.3       hms_1.1.1           digest_0.6.29      
## [37] stringi_1.7.6       numDeriv_2016.8-1.1 grid_4.2.1         
## [40] cli_3.6.0           tools_4.2.1         magrittr_2.0.3     
## [43] sass_0.4.1          tibble_3.1.7        crayon_1.5.1       
## [46] pkgconfig_2.0.3     MASS_7.3-57         ellipsis_0.3.2     
## [49] minqa_1.2.5         assertthat_0.2.1    rmarkdown_2.14     
## [52] rstudioapi_0.13     boot_1.3-28         R6_2.5.1           
## [55] nlme_3.1-157        compiler_4.2.1